preprocessCore/CHANGELOG0000644000126300012640000000514612127132757016362 0ustar00biocbuildphs_compbio1.7.8 ----- Attempt to check if PTHREAD_STACK_MIN is defined 1.7.7 ----- Include on pthreads builds 1.7.5 ----- Fix bug when scale estimate provided to rcModelPLM() 1.7.3/1.7.4 ----------- Fix bug with floating point argument provided where integer expected. Use of non intialized value. 1.7.2 ----- Returned scale estimates are computed using residuals at last iteration rather than n-1 iteration 1.7.1 ----- rcModelPLM, rcModelWPLM now accept an "input.scale" argument (and also return scale value) 1.5.3 ------ fix VECTOR_ELT/STRING_ELT issues 1.5.2 ----- normalize.quantiles.determine.target() and normalize.quantiles.use.target() now have a "subset" argument 1.5.1 ----- Default rcModelPLM() returns both probe coef and se for constraint row (previously SE was not properly returned) 1.3.6 ----- Fix issue with non double arguements to normalize.quantiles.use.target() 1.3.4 ----- Fix memory leak in determine_target Set pthread stack size where appropriate 1.3.3 ----- Fix memory leak in use_target 1.3.2 ----- R_subColSummary functions are now exposed at the C level 1.3.1 ----- rma.background.correct() was not correctly returning value when copy ==TRUE 1.1.9 ----- Commit missing c source files 1.1.8 ----- Commit missing header files 1.1.7 ----- Fix background function bindings (for use by other packages). The implementation themselves is unchanged. 1.1.6 ----- Addition of PLM-r and PLM-d Adjust rcModelPLM so that it takes optional row-effect estimates quantile normalization functions normalize.quantiles(), normalization.quantiles.determine.target(),normalize.quantiles.use.target() all now have multi-threaded support, user controlled using the R_THREADS environment variable Move weightedkerneldensity.c from affyPLM to preprocessCore subColSummarize* functions all now have basic multi-threaded support, user controlled using the R_THREADS rma background correction method (also multi-threaded) 1.1.5 ----- The subColSummarize* functions now return a matrix with rownames. Now it is clear which rows of the resulting summary matrix correspond to which values of the group.labels variable. This helps clarify the previous situation where it done alphabetically. 1.1.3 ----- Fix broken Makevars.in 1.1.2 ----- Add missing Makevars.in 1.1.1 ----- Add experimental support for pthreads based multi-threaded quantile normalization via code contributed by Paul Gordon This is only implemented for the RMA quantile normalization (ie accessible via calling the rma() function. The number of threads is user controlled by setting the R_THREADS environment variable preprocessCore/DESCRIPTION0000644000126300012640000000103012127220006016623 0ustar00biocbuildphs_compbioPackage: preprocessCore Version: 1.22.0 Title: A collection of pre-processing functions Author: Benjamin Milo Bolstad Maintainer: Benjamin Milo Bolstad Depends: methods Imports: stats Description: A library of core preprocessing routines License: LGPL (>= 2) Collate: normalize.quantiles.R quantile_extensions.R rma.background.correct.R rcModel.R colSummarize.R subColSummarize.R plmr.R plmd.R LazyLoad: yes biocViews: Infrastructure Packaged: 2013-04-04 06:39:02 UTC; biocbuild preprocessCore/NAMESPACE0000644000126300012640000000017712127132757016366 0ustar00biocbuildphs_compbiouseDynLib("preprocessCore") importFrom(stats, var) ##export everything that does not start with a . exportPattern("^[^\\.]") preprocessCore/R/0000755000126300012640000000000012127132753015337 5ustar00biocbuildphs_compbiopreprocessCore/R/colSummarize.R0000644000126300012640000000652312127132753020142 0ustar00biocbuildphs_compbio## ## file: colSummarize.R ## ## Author: B. M. Bolstad ## ## History ## Sept 15, 2007 - Initial verison ## colSummarizeAvgLog <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") .Call("R_colSummarize_avg_log",y,PACKAGE="preprocessCore") } colSummarizeLogAvg <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") .Call("R_colSummarize_log_avg",y,PACKAGE="preprocessCore") } colSummarizeAvg <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") .Call("R_colSummarize_avg",y,PACKAGE="preprocessCore") } colSummarizeLogMedian <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") .Call("R_colSummarize_log_median",y,PACKAGE="preprocessCore") } colSummarizeMedianLog <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") .Call("R_colSummarize_median_log",y,PACKAGE="preprocessCore") } colSummarizeMedian <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") .Call("R_colSummarize_median",y,PACKAGE="preprocessCore") } colSummarizeBiweightLog <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") .Call("R_colSummarize_biweight_log",y,PACKAGE="preprocessCore") } colSummarizeBiweight <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") .Call("R_colSummarize_biweight",y,PACKAGE="preprocessCore") } colSummarizeMedianpolishLog <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") .Call("R_colSummarize_medianpolish_log",y,PACKAGE="preprocessCore") } colSummarizeMedianpolish <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") .Call("R_colSummarize_medianpolish",y,PACKAGE="preprocessCore") } preprocessCore/R/normalize.quantiles.R0000644000126300012640000000660012127132753021470 0ustar00biocbuildphs_compbio################################################################## ## ## file: normalize.quantiles.R ## ## For a description of quantile normalization method see ## ## Bolstad, B. M., Irizarry R. A., Astrand, M, and Speed, T. P. (2003)(2003) ## A Comparison of Normalization Methods for High ## Density Oligonucleotide Array Data Based on Bias and Variance. ## Bioinformatics 19,2,pp 185-193 ## ## History ## Pre Aug 23, 2003 Two years worth of stuff ## Aug 23, 2003 - Added use.log2 to "robust", ## added ability to pass additional parameters ## to normalize.AffyBatch.Quantiles.robust ## changed pmonly parameters on functions ## so that it is now a string argument "type" ## the options are pmonly, mmonly, together, separate ## Jan 31, 2004 - put a check for an integer matrix and force coercision to ## doubles if required in normalize.quantiles ## Mar 13, 2005 - Modifications to normalize.quantiles.robust including removing ## approx.method which never got implemented. Making it a use a .Call() ## rather than a .C() ## ## Sep 20, 2006 - fix .Call in normalize.quantiles.robust ## May 20, 2007 - port to preprocessCore. Remove anything to do with AffyBatch Objects ## ################################################################## normalize.quantiles <- function(x,copy=TRUE){ rows <- dim(x)[1] cols <- dim(x)[2] if (!is.matrix(x)){ stop("Matrix expected in normalize.quantiles") } if (is.integer(x)){ x <- matrix(as.double(x),rows,cols) copy <- FALSE } #matrix(.C("qnorm_c", as.double(as.vector(x)), as.integer(rows), as.integer(cols))[[1]], rows, cols) ## .Call("R_qnorm_c",x,copy, PACKAGE="preprocessCore"); .Call("R_qnorm_c_handleNA",x,copy, PACKAGE="preprocessCore"); } normalize.quantiles.robust <- function(x,copy=TRUE,weights=NULL,remove.extreme=c("variance","mean","both","none"),n.remove=1,use.median=FALSE,use.log2=FALSE){ calc.var.ratios <- function(x){ cols <- dim(x)[2] vars <- apply(x,2,var) results <- matrix(0,cols,cols) for (i in 1:cols-1) for (j in (i+1):cols){ results[i,j] <- vars[i]/vars[j] results[j,i] <- vars[j]/vars[i] } results } calc.mean.dists <- function(x){ cols <- dim(x)[2] means <- colMeans(x) results <- matrix(0,cols,cols) for (i in 1:cols-1) for (j in (i+1):cols){ results[i,j] <- means[i] - means[j] results[j,i] <- means[j] - means[i] } results } use.huber <- FALSE remove.extreme <- match.arg(remove.extreme) rows <- dim(x)[1] cols <- dim(x)[2] if (is.null(weights)){ weights <- .Call("R_qnorm_robust_weights",x,remove.extreme,as.integer(n.remove),PACKAGE="preprocessCore") } else { if (is.numeric(weights)){ if (length(weights) != cols){ stop("Weights vector incorrect length\n") } if (sum(weights > 0) < 1){ stop("Need at least one non negative weights\n") } if (any(weights < 0)){ stop("Can't have negative weights") } } else { if (weights =="huber"){ use.huber <- TRUE weights <- rep(1,cols) } else { stop("Don't recognise weights argument as valid.") } } } .Call("R_qnorm_robust_c",x,copy,weights,as.integer(use.median),as.integer(use.log2),as.integer(use.huber),PACKAGE="preprocessCore") } preprocessCore/R/plmd.R0000644000126300012640000000112612127132753016416 0ustar00biocbuildphs_compbiorcModelPLMd <- function(y,group.labels){ if (!is.matrix(y)) stop("argument should be matrix") if (length(group.labels) != ncol(y)){ stop("group labels is of incorrect length") } if (!is.factor(group.labels)){ group.labels <- as.factor(group.labels) } if (any(table(group.labels) < 2)){ stop("Must be at least two arrays in each group") } group.int <- as.integer(group.labels) -1 PsiCode <- 0 PsiK <- 1.345 .Call("R_plmd_model",y,PsiCode,PsiK,as.integer(group.int),as.integer(length(unique(group.labels))),PACKAGE="preprocessCore") } preprocessCore/R/plmr.R0000644000126300012640000000413012127132753016432 0ustar00biocbuildphs_compbiorcModelPLMr <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") PsiCode <- 0 PsiK <- 1.345 .Call("R_plmr_model",y,PsiCode,PsiK,PACKAGE="preprocessCore") } rcModelPLMrr <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") PsiCode <- 0 PsiK <- 1.345 .Call("R_plmrr_model",y,PsiCode,PsiK,PACKAGE="preprocessCore") } rcModelPLMrc <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") PsiCode <- 0 PsiK <- 1.345 .Call("R_plmrc_model",y,PsiCode,PsiK,PACKAGE="preprocessCore") } rcModelWPLMr <- function(y, w){ if (!is.matrix(y)) stop("argument should be matrix") if (is.vector(w)){ if (length(w) != prod(dim(y))){ stop("weights are not correct length") } } else if (is.matrix(w)){ if (!all(dim(w) == dim(y))){ stop("weights should be same dimension as input matrix") } } if (any(w < 0)){ stop("weights should be non negative") } PsiCode <- 0 PsiK <- 1.345 .Call("R_wplmr_model",y,PsiCode,PsiK,as.double(w),PACKAGE="preprocessCore") } rcModelWPLMrr <- function(y, w){ if (!is.matrix(y)) stop("argument should be matrix") if (is.vector(w)){ if (length(w) != prod(dim(y))){ stop("weights are not correct length") } } else if (is.matrix(w)){ if (!all(dim(w) == dim(y))){ stop("weights should be same dimension as input matrix") } } if (any(w < 0)){ stop("weights should be non negative") } PsiCode <- 0 PsiK <- 1.345 .Call("R_wplmrr_model",y,PsiCode,PsiK,as.double(w),PACKAGE="preprocessCore") } rcModelWPLMrc <- function(y, w){ if (!is.matrix(y)) stop("argument should be matrix") if (is.vector(w)){ if (length(w) != prod(dim(y))){ stop("weights are not correct length") } } else if (is.matrix(w)){ if (!all(dim(w) == dim(y))){ stop("weights should be same dimension as input matrix") } } if (any(w < 0)){ stop("weights should be non negative") } PsiCode <- 0 PsiK <- 1.345 .Call("R_wplmrc_model",y,PsiCode,PsiK,as.double(w),PACKAGE="preprocessCore") } preprocessCore/R/quantile_extensions.R0000644000126300012640000000436612127132753021574 0ustar00biocbuildphs_compbio normalize.quantiles.determine.target <- function(x,target.length=NULL,subset=NULL){ if (!is.matrix(x)){ stop("This function expects supplied argument to be matrix") } if (!is.numeric(x)){ stop("Supplied argument should be a numeric matrix") } rows <- dim(x)[1] cols <- dim(x)[2] if (is.integer(x)){ x <- matrix(as.double(x), rows, cols) } if (is.null(target.length)){ target.length <- rows } if (target.length <= 0){ stop("Need positive length for target.length") } if (is.null(subset)){ return(.Call("R_qnorm_determine_target",x,target.length,PACKAGE="preprocessCore")) } else { if (length(subset) != rows){ stop("subset should have same length as nrows(x)") } subset <- as.integer(subset) return(.Call("R_qnorm_determine_target_via_subset",x, subset,target.length,PACKAGE="preprocessCore")) } } normalize.quantiles.use.target <- function(x,target,copy=TRUE,subset=NULL){ if (!is.matrix(x)){ stop("This function expects supplied argument to be matrix") } if (!is.numeric(x)){ stop("Supplied argument should be a numeric matrix") } rows <- dim(x)[1] cols <- dim(x)[2] if (is.integer(x)){ x <- matrix(as.double(x), rows, cols) } if (!is.vector(target)){ stop("This function expects target to be vector") } if (!is.numeric(target)){ stop("Supplied target argument should be a numeric vector") } if (is.integer(target)){ target <- as.double(target) } if (is.null(subset)){ return(.Call("R_qnorm_using_target",x,target,copy,PACKAGE="preprocessCore")) } else { if (length(subset) != rows){ stop("subset should have same length as nrows(x)") } subset <- as.integer(subset) return(.Call("R_qnorm_using_target_via_subset",x, subset, target, copy, PACKAGE="preprocessCore")) } } normalize.quantiles.in.blocks <- function(x,blocks,copy=TRUE){ rows <- dim(x)[1] cols <- dim(x)[2] if (rows != length(blocks)){ stop("blocks is not vector of correct length") } if (is.factor(blocks)){ blocks <- as.integer(blocks) } if (!is.numeric(blocks)){ stop("non-numeric vector used for blocks") } return(.Call("R_qnorm_within_blocks",x,blocks,copy,PACKAGE="preprocessCore")) } preprocessCore/R/rcModel.R0000644000126300012640000000640512127132753017054 0ustar00biocbuildphs_compbio rcModelPLM <- function(y,row.effects=NULL, input.scale=NULL){ if (!is.matrix(y)) stop("argument should be matrix") PsiCode <- 0 PsiK <- 1.345 if (is.null(row.effects)){ .Call("R_rlm_rma_default_model",y,PsiCode,PsiK,input.scale,PACKAGE="preprocessCore") } else { if (length(row.effects) != nrow(y)){ stop("row.effects parameter should be same length as number of rows") } if (abs(sum(row.effects)) > 10*.Machine$double.eps){ stop("row.effects should sum to zero") } .Call("R_rlm_rma_given_probe_effects",y,as.double(row.effects),PsiCode,PsiK,input.scale,PACKAGE="preprocessCore") } } rcModelWPLM <- function(y, w, row.effects=NULL, input.scale=NULL){ if (!is.matrix(y)) stop("argument should be matrix") if (is.vector(w)){ if (length(w) != prod(dim(y))){ stop("weights are not correct length") } } else if (is.matrix(w)){ if (!all(dim(w) == dim(y))){ stop("weights should be same dimension as input matrix") } } if (any(w < 0)){ stop("weights should be no negative") } PsiCode <- 0 PsiK <- 1.345 if (is.null(row.effects)){ .Call("R_wrlm_rma_default_model",y,PsiCode,PsiK,as.double(w),input.scale,PACKAGE="preprocessCore") } else { if (length(row.effects) != nrow(y)){ stop("row.effects parameter should be same length as number of rows") } if (abs(sum(row.effects)) > 10*.Machine$double.eps){ stop("row.effects should sum to zero") } .Call("R_wrlm_rma_given_probe_effects",y,as.double(row.effects),PsiCode,PsiK,as.double(w),input.scale,PACKAGE="preprocessCore") } } rcModelMedianPolish <- function(y){ if (!is.matrix(y)) stop("argument should be matrix") PsiCode <- 0 PsiK <- 1.345 .Call("R_medianpolish_rma_default_model",y,PACKAGE="preprocessCore") } subrcModelMedianPolish <- function(y,group.labels){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") rowIndexList <- convert.group.labels(group.labels) x <- .Call("R_sub_rcModelSummarize_medianpolish", y, rowIndexList,PACKAGE="preprocessCore") names(x) <- names(rowIndexList) x } subrcModelPLM <- function(y,group.labels,row.effects=NULL, input.scale=NULL){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") rowIndexList <- convert.group.labels(group.labels) PsiCode <- 0 PsiK <- 1.345 if (is.null(row.effects)){ x <- .Call("R_sub_rcModelSummarize_plm", y, rowIndexList, PsiCode, PsiK, input.scale,PACKAGE="preprocessCore") names(x) <- names(rowIndexList) x } else { stop("row.effects not yet implemented for subrcModelPLM") if (length(row.effects) != nrow(y)){ stop("row.effects parameter should be same length as number of rows") } if (abs(sum(row.effects)) > 10*.Machine$double.eps){ stop("row.effects should sum to zero") } .Call("R_rlm_rma_given_probe_effects",y,as.double(row.effects),PsiCode,PsiK,input.scale,PACKAGE="preprocessCore") } } preprocessCore/R/rma.background.correct.R0000644000126300012640000000155012127132753022020 0ustar00biocbuildphs_compbio################################################################## ## ## file: rma.background.correct.R ## ## implements the normal boackground + exponential signal background ## correction traditionally used in RMA computations ## ## for more details see ## Bolstad, BM (2004) Low Level Analysis of High-density Oligonucleotide Array Data: Background, Normalization and Summarization. Dissertation. University of California, Berkeley. ## pages 17-21 ## ## ## History ## Mar 22, 2008 - Initial version (in preprocessCore) ## ## rma.background.correct <- function(x,copy=TRUE){ rows <- dim(x)[1] cols <- dim(x)[2] if (!is.matrix(x)){ stop("Matrix expected in normalize.quantiles") } if (is.integer(x)){ x <- matrix(as.double(x),rows,cols) copy <- FALSE } .Call("R_rma_bg_correct", x, copy, PACKAGE="preprocessCore"); } preprocessCore/R/subColSummarize.R0000644000126300012640000001163312127132753020612 0ustar00biocbuildphs_compbio## ## file: subColSummarize.R ## ## Author: B. M. Bolstad ## ## History ## Sept 18, 2007 - Initial verison ## Dec 10, 2007 - add rownames to output ## convert.group.labels <- function(group.labels){ if (!is.factor(group.labels)) group.labels <- as.factor(group.labels) split(0:(length(group.labels) -1),group.labels) } subColSummarizeAvgLog <- function(y, group.labels){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") rowIndexList <- convert.group.labels(group.labels) x <- .Call("R_subColSummarize_avg_log", y, rowIndexList, PACKAGE="preprocessCore") rownames(x) <- names(rowIndexList) x } subColSummarizeLogAvg <- function(y, group.labels){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") rowIndexList <- convert.group.labels(group.labels) x <- .Call("R_subColSummarize_log_avg", y, rowIndexList, PACKAGE="preprocessCore") rownames(x) <- names(rowIndexList) x } subColSummarizeAvg <- function(y, group.labels){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") rowIndexList <- convert.group.labels(group.labels) x <- .Call("R_subColSummarize_avg", y, rowIndexList, PACKAGE="preprocessCore") rownames(x) <- names(rowIndexList) x } subColSummarizeBiweightLog <- function(y, group.labels){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") rowIndexList <- convert.group.labels(group.labels) x <- .Call("R_subColSummarize_biweight_log", y, rowIndexList, PACKAGE="preprocessCore") rownames(x) <- names(rowIndexList) x } subColSummarizeBiweight <- function(y, group.labels){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") rowIndexList <- convert.group.labels(group.labels) x <- .Call("R_subColSummarize_biweight", y, rowIndexList, PACKAGE="preprocessCore") rownames(x) <- names(rowIndexList) x } subColSummarizeMedianLog <- function(y, group.labels){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") rowIndexList <- convert.group.labels(group.labels) x <- .Call("R_subColSummarize_median_log", y, rowIndexList, PACKAGE="preprocessCore") rownames(x) <- names(rowIndexList) x } subColSummarizeLogMedian <- function(y, group.labels){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") rowIndexList <- convert.group.labels(group.labels) x <- .Call("R_subColSummarize_log_median", y, rowIndexList, PACKAGE="preprocessCore") rownames(x) <- names(rowIndexList) x } subColSummarizeMedian <- function(y, group.labels){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") rowIndexList <- convert.group.labels(group.labels) x <- .Call("R_subColSummarize_median", y, rowIndexList, PACKAGE="preprocessCore") rownames(x) <- names(rowIndexList) x } subColSummarizeMedianpolishLog <- function(y, group.labels){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") rowIndexList <- convert.group.labels(group.labels) x <- .Call("R_subColSummarize_medianpolish_log", y, rowIndexList, PACKAGE="preprocessCore") rownames(x) <- names(rowIndexList) x } subColSummarizeMedianpolish <- function(y, group.labels){ if (!is.matrix(y)) stop("argument should be matrix") if (!is.double(y) & is.numeric(y)) y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) else if (!is.numeric(y)) stop("argument should be numeric matrix") rowIndexList <- convert.group.labels(group.labels) x <- .Call("R_subColSummarize_medianpolish", y, rowIndexList, PACKAGE="preprocessCore") rownames(x) <- names(rowIndexList) x } preprocessCore/configure0000755000126300012640000037656412127132757017076 0ustar00biocbuildphs_compbio#! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.68. # # # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, # 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software # Foundation, Inc. # # # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo # Prefer a ksh shell builtin over an external printf program on Solaris, # but without wasting forks for bash or zsh. if test -z "$BASH_VERSION$ZSH_VERSION" \ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='print -r --' as_echo_n='print -rn --' elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in #( *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # Unset variables that we do not need and which cause bugs (e.g. in # pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" # suppresses any "Segmentation fault" message there. '((' could # trigger a bug in pdksh 5.2.14. for as_var in BASH_ENV ENV MAIL MAILPATH do eval test x\${$as_var+set} = xset \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # CDPATH. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH if test "x$CONFIG_SHELL" = x; then as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST else case \`(set -o) 2>/dev/null\` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi " as_required="as_fn_return () { (exit \$1); } as_fn_success () { as_fn_return 0; } as_fn_failure () { as_fn_return 1; } as_fn_ret_success () { return 0; } as_fn_ret_failure () { return 1; } exitcode=0 as_fn_success || { exitcode=1; echo as_fn_success failed.; } as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : else exitcode=1; echo positional parameters were not saved. fi test x\$exitcode = x0 || exit 1" as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 test \$(( 1 + 1 )) = 2 || exit 1" if (eval "$as_required") 2>/dev/null; then : as_have_required=yes else as_have_required=no fi if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. as_found=: case $as_dir in #( /*) for as_base in sh bash ksh sh5; do # Try only shells that exist, to save several forks. as_shell=$as_dir/$as_base if { test -f "$as_shell" || test -f "$as_shell.exe"; } && { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : CONFIG_SHELL=$as_shell as_have_required=yes if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : break 2 fi fi done;; esac as_found=false done $as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : CONFIG_SHELL=$SHELL as_have_required=yes fi; } IFS=$as_save_IFS if test "x$CONFIG_SHELL" != x; then : # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also # works around shells that cannot unset nonexistent variables. # Preserve -v and -x to the replacement shell. BASH_ENV=/dev/null ENV=/dev/null (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV export CONFIG_SHELL case $- in # (((( *v*x* | *x*v* ) as_opts=-vx ;; *v* ) as_opts=-v ;; *x* ) as_opts=-x ;; * ) as_opts= ;; esac exec "$CONFIG_SHELL" $as_opts "$as_myself" ${1+"$@"} fi if test x$as_have_required = xno; then : $as_echo "$0: This script requires a shell more modern than all" $as_echo "$0: the shells that I found on your system." if test x${ZSH_VERSION+set} = xset ; then $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" $as_echo "$0: be upgraded to zsh 4.3.4 or later." else $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, $0: including any error possibly output before this $0: message. Then install a modern shell, or manually run $0: the script under such a shell if you do have one." fi exit 1 fi fi fi SHELL=${CONFIG_SHELL-/bin/sh} export SHELL # Unset more variables known to interfere with behavior of common tools. CLICOLOR_FORCE= GREP_OPTIONS= unset CLICOLOR_FORCE GREP_OPTIONS ## --------------------- ## ## M4sh Shell Functions. ## ## --------------------- ## # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : eval 'as_fn_append () { eval $1+=\$2 }' else as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : eval 'as_fn_arith () { as_val=$(( $* )) }' else as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi $as_echo "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits as_lineno_1=$LINENO as_lineno_1a=$LINENO as_lineno_2=$LINENO as_lineno_2a=$LINENO eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) sed -n ' p /[$]LINENO/= ' <$as_myself | sed ' s/[$]LINENO.*/&-/ t lineno b :lineno N :loop s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ t loop s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensitive to this). . "./$as_me.lineno" # Exit status is that of the last command. exit } ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -p'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -p' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi else as_ln_s='cp -p' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi if test -x / >/dev/null 2>&1; then as_test_x='test -x' else if ls -dL / >/dev/null 2>&1; then as_ls_L_option=L else as_ls_L_option= fi as_test_x=' eval sh -c '\'' if test -d "$1"; then test -d "$1/."; else case $1 in #( -*)set "./$1";; esac; case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #(( ???[sx]*):;;*)false;;esac;fi '\'' sh ' fi as_executable_p=$as_test_x # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" test -n "$DJDIR" || exec 7<&0 &1 # Name of the host. # hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, # so uname gets run too. ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` # # Initializations. # ac_default_prefix=/usr/local ac_clean_files= ac_config_libobj_dir=. LIBOBJS= cross_compiling=no subdirs= MFLAGS= MAKEFLAGS= # Identity of this package. PACKAGE_NAME= PACKAGE_TARNAME= PACKAGE_VERSION= PACKAGE_STRING= PACKAGE_BUGREPORT= PACKAGE_URL= ac_unique_file=""DESCRIPTION"" # Factoring default headers for most tests. ac_includes_default="\ #include #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef STDC_HEADERS # include # include #else # ifdef HAVE_STDLIB_H # include # endif #endif #ifdef HAVE_STRING_H # if !defined STDC_HEADERS && defined HAVE_MEMORY_H # include # endif # include #endif #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_INTTYPES_H # include #endif #ifdef HAVE_STDINT_H # include #endif #ifdef HAVE_UNISTD_H # include #endif" ac_subst_vars='LTLIBOBJS LIBOBJS EGREP GREP CPP OBJEXT EXEEXT ac_ct_CC CPPFLAGS LDFLAGS CFLAGS CC target_alias host_alias build_alias LIBS ECHO_T ECHO_N ECHO_C DEFS mandir localedir libdir psdir pdfdir dvidir htmldir infodir docdir oldincludedir includedir localstatedir sharedstatedir sysconfdir datadir datarootdir libexecdir sbindir bindir program_transform_name prefix exec_prefix PACKAGE_URL PACKAGE_BUGREPORT PACKAGE_STRING PACKAGE_VERSION PACKAGE_TARNAME PACKAGE_NAME PATH_SEPARATOR SHELL' ac_subst_files='' ac_user_opts=' enable_option_checking ' ac_precious_vars='build_alias host_alias target_alias CC CFLAGS LDFLAGS LIBS CPPFLAGS CPP' # Initialize some variables set by options. ac_init_help= ac_init_version=false ac_unrecognized_opts= ac_unrecognized_sep= # The variables have the same names as the options, with # dashes changed to underlines. cache_file=/dev/null exec_prefix=NONE no_create= no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= verbose= x_includes=NONE x_libraries=NONE # Installation directory options. # These are left unexpanded so users can "make install exec_prefix=/foo" # and all the variables that are supposed to be based on exec_prefix # by default will actually change. # Use braces instead of parens because sh, perl, etc. also accept them. # (The list follows the same order as the GNU Coding Standards.) bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datarootdir='${prefix}/share' datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE}' infodir='${datarootdir}/info' htmldir='${docdir}' dvidir='${docdir}' pdfdir='${docdir}' psdir='${docdir}' libdir='${exec_prefix}/lib' localedir='${datarootdir}/locale' mandir='${datarootdir}/man' ac_prev= ac_dashdash= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval $ac_prev=\$ac_option ac_prev= continue fi case $ac_option in *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; *=) ac_optarg= ;; *) ac_optarg=yes ;; esac # Accept the important Cygnus configure options, so we can diagnose typos. case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir=$ac_optarg ;; -build | --build | --buil | --bui | --bu) ac_prev=build_alias ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build_alias=$ac_optarg ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file=$ac_optarg ;; --config-cache | -C) cache_file=config.cache ;; -datadir | --datadir | --datadi | --datad) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=*) datadir=$ac_optarg ;; -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ | --dataroo | --dataro | --datar) ac_prev=datarootdir ;; -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) datarootdir=$ac_optarg ;; -disable-* | --disable-*) ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=no ;; -docdir | --docdir | --docdi | --doc | --do) ac_prev=docdir ;; -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) docdir=$ac_optarg ;; -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) ac_prev=dvidir ;; -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) dvidir=$ac_optarg ;; -enable-* | --enable-*) ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=\$ac_optarg ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix=$ac_optarg ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he | -h) ac_init_help=long ;; -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) ac_init_help=recursive ;; -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) ac_init_help=short ;; -host | --host | --hos | --ho) ac_prev=host_alias ;; -host=* | --host=* | --hos=* | --ho=*) host_alias=$ac_optarg ;; -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) ac_prev=htmldir ;; -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ | --ht=*) htmldir=$ac_optarg ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir=$ac_optarg ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir=$ac_optarg ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir=$ac_optarg ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir=$ac_optarg ;; -localedir | --localedir | --localedi | --localed | --locale) ac_prev=localedir ;; -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) localedir=$ac_optarg ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst | --locals) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) localstatedir=$ac_optarg ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir=$ac_optarg ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c | -n) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir=$ac_optarg ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix=$ac_optarg ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix=$ac_optarg ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix=$ac_optarg ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name=$ac_optarg ;; -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) ac_prev=pdfdir ;; -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) pdfdir=$ac_optarg ;; -psdir | --psdir | --psdi | --psd | --ps) ac_prev=psdir ;; -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) psdir=$ac_optarg ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir=$ac_optarg ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir=$ac_optarg ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site=$ac_optarg ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir=$ac_optarg ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir=$ac_optarg ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target_alias ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target_alias=$ac_optarg ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers | -V) ac_init_version=: ;; -with-* | --with-*) ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=\$ac_optarg ;; -without-* | --without-*) ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=no ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes=$ac_optarg ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; -*) as_fn_error $? "unrecognized option: \`$ac_option' Try \`$0 --help' for more information" ;; *=*) ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` # Reject names that are not valid shell variable names. case $ac_envvar in #( '' | [0-9]* | *[!_$as_cr_alnum]* ) as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; esac eval $ac_envvar=\$ac_optarg export $ac_envvar ;; *) # FIXME: should be removed in autoconf 3.0. $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" ;; esac done if test -n "$ac_prev"; then ac_option=--`echo $ac_prev | sed 's/_/-/g'` as_fn_error $? "missing argument to $ac_option" fi if test -n "$ac_unrecognized_opts"; then case $enable_option_checking in no) ;; fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; esac fi # Check all directory arguments for consistency. for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ libdir localedir mandir do eval ac_val=\$$ac_var # Remove trailing slashes. case $ac_val in */ ) ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` eval $ac_var=\$ac_val;; esac # Be sure to have absolute directory names. case $ac_val in [\\/$]* | ?:[\\/]* ) continue;; NONE | '' ) case $ac_var in *prefix ) continue;; esac;; esac as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" done # There might be people who depend on the old broken behavior: `$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias host=$host_alias target=$target_alias # FIXME: To remove some day. if test "x$host_alias" != x; then if test "x$build_alias" = x; then cross_compiling=maybe $as_echo "$as_me: WARNING: if you wanted to set the --build type, don't use --host. If a cross compiler is detected then cross compile mode will be used" >&2 elif test "x$build_alias" != "x$host_alias"; then cross_compiling=yes fi fi ac_tool_prefix= test -n "$host_alias" && ac_tool_prefix=$host_alias- test "$silent" = yes && exec 6>/dev/null ac_pwd=`pwd` && test -n "$ac_pwd" && ac_ls_di=`ls -di .` && ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || as_fn_error $? "working directory cannot be determined" test "X$ac_ls_di" = "X$ac_pwd_ls_di" || as_fn_error $? "pwd does not report name of working directory" # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then the parent directory. ac_confdir=`$as_dirname -- "$as_myself" || $as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_myself" : 'X\(//\)[^/]' \| \ X"$as_myself" : 'X\(//\)$' \| \ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_myself" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` srcdir=$ac_confdir if test ! -r "$srcdir/$ac_unique_file"; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r "$srcdir/$ac_unique_file"; then test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" fi ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" ac_abs_confdir=`( cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" pwd)` # When building in place, set srcdir=. if test "$ac_abs_confdir" = "$ac_pwd"; then srcdir=. fi # Remove unnecessary trailing slashes from srcdir. # Double slashes in file names in object file debugging info # mess up M-x gdb in Emacs. case $srcdir in */) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; esac for ac_var in $ac_precious_vars; do eval ac_env_${ac_var}_set=\${${ac_var}+set} eval ac_env_${ac_var}_value=\$${ac_var} eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} eval ac_cv_env_${ac_var}_value=\$${ac_var} done # # Report the --help message. # if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF \`configure' configures this package to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... To assign environment variables (e.g., CC, CFLAGS...), specify them as VAR=VALUE. See below for descriptions of some of the useful variables. Defaults for the options are specified in brackets. Configuration: -h, --help display this help and exit --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit -q, --quiet, --silent do not print \`checking ...' messages --cache-file=FILE cache test results in FILE [disabled] -C, --config-cache alias for \`--cache-file=config.cache' -n, --no-create do not create output files --srcdir=DIR find the sources in DIR [configure dir or \`..'] Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] By default, \`make install' will install all the files in \`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify an installation prefix other than \`$ac_default_prefix' using \`--prefix', for instance \`--prefix=\$HOME'. For better control, use the options below. Fine tuning of the installation directories: --bindir=DIR user executables [EPREFIX/bin] --sbindir=DIR system admin executables [EPREFIX/sbin] --libexecdir=DIR program executables [EPREFIX/libexec] --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] --datadir=DIR read-only architecture-independent data [DATAROOTDIR] --infodir=DIR info documentation [DATAROOTDIR/info] --localedir=DIR locale-dependent data [DATAROOTDIR/locale] --mandir=DIR man documentation [DATAROOTDIR/man] --docdir=DIR documentation root [DATAROOTDIR/doc/PACKAGE] --htmldir=DIR html documentation [DOCDIR] --dvidir=DIR dvi documentation [DOCDIR] --pdfdir=DIR pdf documentation [DOCDIR] --psdir=DIR ps documentation [DOCDIR] _ACEOF cat <<\_ACEOF _ACEOF fi if test -n "$ac_init_help"; then cat <<\_ACEOF Some influential environment variables: CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory LIBS libraries to pass to the linker, e.g. -l CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory CPP C preprocessor Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. Report bugs to the package provider. _ACEOF ac_status=$? fi if test "$ac_init_help" = "recursive"; then # If there are subdirs, report their specific --help. for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue test -d "$ac_dir" || { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || continue ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } # Check for guested configure. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive elif test -f "$ac_srcdir/configure"; then echo && $SHELL "$ac_srcdir/configure" --help=recursive else $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF configure generated by GNU Autoconf 2.68 Copyright (C) 2010 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF exit fi ## ------------------------ ## ## Autoconf initialization. ## ## ------------------------ ## # ac_fn_c_try_compile LINENO # -------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_compile # ac_fn_c_try_cpp LINENO # ---------------------- # Try to preprocess conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_cpp () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } > conftest.i && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_cpp # ac_fn_c_try_link LINENO # ----------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || $as_test_x conftest$ac_exeext }; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would # interfere with the next link command; also delete a directory that is # left behind by Apple's compiler. We do this before executing the actions. rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_link # ac_fn_c_try_run LINENO # ---------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. Assumes # that executables *can* be run. ac_fn_c_try_run () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then : ac_retval=0 else $as_echo "$as_me: program exited with status $ac_status" >&5 $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=$ac_status fi rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_run # ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES # ------------------------------------------------------- # Tests whether HEADER exists, giving a warning if it cannot be compiled using # the include files in INCLUDES and setting the cache variable VAR # accordingly. ac_fn_c_check_header_mongrel () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if eval \${$3+:} false; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } else # Is the header compilable? { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 $as_echo_n "checking $2 usability... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_header_compiler=yes else ac_header_compiler=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 $as_echo "$ac_header_compiler" >&6; } # Is the header present? { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 $as_echo_n "checking $2 presence... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include <$2> _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : ac_header_preproc=yes else ac_header_preproc=no fi rm -f conftest.err conftest.i conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 $as_echo "$ac_header_preproc" >&6; } # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( yes:no: ) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 $as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} ;; no:yes:* ) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 $as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 $as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 $as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 $as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else eval "$3=\$ac_header_compiler" fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_mongrel # ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES # ------------------------------------------------------- # Tests whether HEADER exists and can be compiled using the include files in # INCLUDES, setting the cache variable VAR accordingly. ac_fn_c_check_header_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> _ACEOF if ac_fn_c_try_compile "$LINENO"; then : eval "$3=yes" else eval "$3=no" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_compile cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by $as_me, which was generated by GNU Autoconf 2.68. Invocation command line was $ $0 $@ _ACEOF exec 5>>config.log { cat <<_ASUNAME ## --------- ## ## Platform. ## ## --------- ## hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` /bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` /bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` /usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` /bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` /bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` _ASUNAME as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. $as_echo "PATH: $as_dir" done IFS=$as_save_IFS } >&5 cat >&5 <<_ACEOF ## ----------- ## ## Core tests. ## ## ----------- ## _ACEOF # Keep a trace of the command line. # Strip out --no-create and --no-recursion so they do not pile up. # Strip out --silent because we don't want to record it for future runs. # Also quote any args containing shell meta-characters. # Make two passes to allow for proper duplicate-argument suppression. ac_configure_args= ac_configure_args0= ac_configure_args1= ac_must_keep_next=false for ac_pass in 1 2 do for ac_arg do case $ac_arg in -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; 2) as_fn_append ac_configure_args1 " '$ac_arg'" if test $ac_must_keep_next = true; then ac_must_keep_next=false # Got value, back to normal. else case $ac_arg in *=* | --config-cache | -C | -disable-* | --disable-* \ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ | -with-* | --with-* | -without-* | --without-* | --x) case "$ac_configure_args0 " in "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; esac ;; -* ) ac_must_keep_next=true ;; esac fi as_fn_append ac_configure_args " '$ac_arg'" ;; esac done done { ac_configure_args0=; unset ac_configure_args0;} { ac_configure_args1=; unset ac_configure_args1;} # When interrupted or exit'd, cleanup temporary files, and complete # config.log. We remove comments because anyway the quotes in there # would cause problems or look ugly. # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? # Save into config.log some information that might help in debugging. { echo $as_echo "## ---------------- ## ## Cache variables. ## ## ---------------- ##" echo # The following way of writing the cache mishandles newlines in values, ( for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( *${as_nl}ac_space=\ *) sed -n \ "s/'\''/'\''\\\\'\'''\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" ;; #( *) sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) echo $as_echo "## ----------------- ## ## Output variables. ## ## ----------------- ##" echo for ac_var in $ac_subst_vars do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then $as_echo "## ------------------- ## ## File substitutions. ## ## ------------------- ##" echo for ac_var in $ac_subst_files do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then $as_echo "## ----------- ## ## confdefs.h. ## ## ----------- ##" echo cat confdefs.h echo fi test "$ac_signal" != 0 && $as_echo "$as_me: caught signal $ac_signal" $as_echo "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && exit $exit_status ' 0 for ac_signal in 1 2 13 15; do trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal done ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h $as_echo "/* confdefs.h */" > confdefs.h # Predefined preprocessor variables. cat >>confdefs.h <<_ACEOF #define PACKAGE_NAME "$PACKAGE_NAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_TARNAME "$PACKAGE_TARNAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_VERSION "$PACKAGE_VERSION" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_STRING "$PACKAGE_STRING" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_URL "$PACKAGE_URL" _ACEOF # Let the site file select an alternate cache file if it wants to. # Prefer an explicitly selected file to automatically selected ones. ac_site_file1=NONE ac_site_file2=NONE if test -n "$CONFIG_SITE"; then # We do not want a PATH search for config.site. case $CONFIG_SITE in #(( -*) ac_site_file1=./$CONFIG_SITE;; */*) ac_site_file1=$CONFIG_SITE;; *) ac_site_file1=./$CONFIG_SITE;; esac elif test "x$prefix" != xNONE; then ac_site_file1=$prefix/share/config.site ac_site_file2=$prefix/etc/config.site else ac_site_file1=$ac_default_prefix/share/config.site ac_site_file2=$ac_default_prefix/etc/config.site fi for ac_site_file in "$ac_site_file1" "$ac_site_file2" do test "x$ac_site_file" = xNONE && continue if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 $as_echo "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" \ || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "failed to load site script $ac_site_file See \`config.log' for more details" "$LINENO" 5; } fi done if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special files # actually), so we avoid doing that. DJGPP emulates it as a regular file. if test /dev/null != "$cache_file" && test -f "$cache_file"; then { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 $as_echo "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 $as_echo "$as_me: creating cache $cache_file" >&6;} >$cache_file fi # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false for ac_var in $ac_precious_vars; do eval ac_old_set=\$ac_cv_env_${ac_var}_set eval ac_new_set=\$ac_env_${ac_var}_set eval ac_old_val=\$ac_cv_env_${ac_var}_value eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) if test "x$ac_old_val" != "x$ac_new_val"; then # differences in whitespace do not lead to failure. ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 $as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 $as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 $as_echo "$as_me: former value: \`$ac_old_val'" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 $as_echo "$as_me: current value: \`$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. *) as_fn_append ac_configure_args " '$ac_arg'" ;; esac fi done if $ac_cache_corrupted; then { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 $as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 fi ## -------------------- ## ## Main body of script. ## ## -------------------- ## ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu : ${R_HOME=`R RHOME`} if test -z "${R_HOME}"; then echo "could not determine R_HOME" exit 1 fi CC=`"${R_HOME}/bin/R" CMD config CC` CPP=`"${R_HOME}/bin/R" CMD config CPP` CFLAGS=`"${R_HOME}/bin/R" CMD config CFLAGS` CPPFLAGS=`"${R_HOME}/bin/R" CMD config CPPFLAGS` ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_CC="${ac_tool_prefix}gcc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_CC="gcc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi else CC="$ac_cv_prog_CC" fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_CC="${ac_tool_prefix}cc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else ac_prog_rejected=no as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# != 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then for ac_prog in cl.exe do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC for ac_prog in cl.exe do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_CC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_CC" && break done if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi fi fi test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "no acceptable C compiler found in \$PATH See \`config.log' for more details" "$LINENO" 5; } # Provide some information about the compiler. $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then sed '10a\ ... rest of stderr output deleted ... 10q' conftest.err >conftest.er1 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 $as_echo_n "checking whether the C compiler works... " >&6; } ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # The possible output files: ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" ac_rmfiles= for ac_file in $ac_files do case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; * ) ac_rmfiles="$ac_rmfiles $ac_file";; esac done rm -f $ac_rmfiles if { { ac_try="$ac_link_default" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link_default") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, # so that the user can short-circuit this test for compilers unknown to # Autoconf. for ac_file in $ac_files '' do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; [ab].out ) # We found the default executable, but exeext='' is most # certainly right. break;; *.* ) if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi # We set ac_cv_exeext here because the later test for it is not # safe: cross compilers may not add the suffix if given an `-o' # argument, so we may need to know it at that point already. # Even if this section looks crufty: it has the advantage of # actually working. break;; * ) break;; esac done test "$ac_cv_exeext" = no && ac_cv_exeext= else ac_file='' fi if test -z "$ac_file"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "C compiler cannot create executables See \`config.log' for more details" "$LINENO" 5; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 $as_echo_n "checking for C compiler default output file name... " >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 $as_echo "$ac_file" >&6; } ac_exeext=$ac_cv_exeext rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 $as_echo_n "checking for suffix of executables... " >&6; } if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with # `rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` break;; * ) break;; esac done else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of executables: cannot compile and link See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest conftest$ac_cv_exeext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 $as_echo "$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext ac_exeext=$EXEEXT cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main () { FILE *f = fopen ("conftest.out", "w"); return ferror (f) || fclose (f) != 0; ; return 0; } _ACEOF ac_clean_files="$ac_clean_files conftest.out" # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 $as_echo_n "checking whether we are cross compiling... " >&6; } if test "$cross_compiling" != yes; then { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if { ac_try='./conftest$ac_cv_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details" "$LINENO" 5; } fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 $as_echo "$cross_compiling" >&6; } rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out ac_clean_files=$ac_clean_files_save { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 $as_echo_n "checking for suffix of object files... " >&6; } if ${ac_cv_objext+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.o conftest.obj if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` break;; esac done else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of object files: cannot compile See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 $as_echo "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 $as_echo_n "checking whether we are using the GNU C compiler... " >&6; } if ${ac_cv_c_compiler_gnu+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_compiler_gnu=yes else ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 $as_echo "$ac_cv_c_compiler_gnu" >&6; } if test $ac_compiler_gnu = yes; then GCC=yes else GCC= fi ac_test_CFLAGS=${CFLAGS+set} ac_save_CFLAGS=$CFLAGS { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 $as_echo_n "checking whether $CC accepts -g... " >&6; } if ${ac_cv_prog_cc_g+:} false; then : $as_echo_n "(cached) " >&6 else ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_g=yes else CFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : else ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_g=yes fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 $as_echo "$ac_cv_prog_cc_g" >&6; } if test "$ac_test_CFLAGS" = set; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 $as_echo_n "checking for $CC option to accept ISO C89... " >&6; } if ${ac_cv_prog_cc_c89+:} false; then : $as_echo_n "(cached) " >&6 else ac_cv_prog_cc_c89=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #include #include /* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ struct buf { int x; }; FILE * (*rcsopen) (struct buf *, struct stat *, int); static char *e (p, i) char **p; int i; { return p[i]; } static char *f (char * (*g) (char **, int), char **p, ...) { char *s; va_list v; va_start (v,p); s = g (p, va_arg (v,int)); va_end (v); return s; } /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not '\xHH' hex character constants. These don't provoke an error unfortunately, instead are silently treated as 'x'. The following induces an error, until -std is added to get proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an array size at least. It's necessary to write '\x00'==0 to get something that's true only with -std. */ int osf4_cc_array ['\x00' == 0 ? 1 : -1]; /* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters inside strings and character constants. */ #define FOO(x) 'x' int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); int argc; char **argv; int main () { return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; ; return 0; } _ACEOF for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_c89=$ac_arg fi rm -f core conftest.err conftest.$ac_objext test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi # AC_CACHE_VAL case "x$ac_cv_prog_cc_c89" in x) { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 $as_echo "none needed" >&6; } ;; xno) { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 $as_echo "unsupported" >&6; } ;; *) CC="$CC $ac_cv_prog_cc_c89" { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 $as_echo "$ac_cv_prog_cc_c89" >&6; } ;; esac if test "x$ac_cv_prog_cc_c89" != xno; then : fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 $as_echo_n "checking how to run the C preprocessor... " >&6; } # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if ${ac_cv_prog_CPP+:} false; then : $as_echo_n "(cached) " >&6 else # Double quotes because CPP needs to be expanded for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : else # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : # Broken: success on invalid input. continue else # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok; then : break fi done ac_cv_prog_CPP=$CPP fi CPP=$ac_cv_prog_CPP else ac_cv_prog_CPP=$CPP fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 $as_echo "$CPP" >&6; } ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : else # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : # Broken: success on invalid input. continue else # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok; then : else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details" "$LINENO" 5; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # Checks for libraries. use_pthreads=no { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing pthread_create" >&5 $as_echo_n "checking for library containing pthread_create... " >&6; } if ${ac_cv_search_pthread_create+:} false; then : $as_echo_n "(cached) " >&6 else ac_func_search_save_LIBS=$LIBS cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char pthread_create (); int main () { return pthread_create (); ; return 0; } _ACEOF for ac_lib in '' pthread; do if test -z "$ac_lib"; then ac_res="none required" else ac_res=-l$ac_lib LIBS="-l$ac_lib $ac_func_search_save_LIBS" fi if ac_fn_c_try_link "$LINENO"; then : ac_cv_search_pthread_create=$ac_res fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext if ${ac_cv_search_pthread_create+:} false; then : break fi done if ${ac_cv_search_pthread_create+:} false; then : else ac_cv_search_pthread_create=no fi rm conftest.$ac_ext LIBS=$ac_func_search_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_pthread_create" >&5 $as_echo "$ac_cv_search_pthread_create" >&6; } ac_res=$ac_cv_search_pthread_create if test "$ac_res" != no; then : test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" use_pthreads=yes fi # Checks for header files. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 $as_echo_n "checking for grep that handles long lines and -e... " >&6; } if ${ac_cv_path_GREP+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$GREP"; then ac_path_GREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in grep ggrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue # Check for GNU ac_path_GREP and select it if it is found. # Check for GNU $ac_path_GREP case `"$ac_path_GREP" --version 2>&1` in *GNU*) ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'GREP' >> "conftest.nl" "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_GREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_GREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_GREP"; then as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_GREP=$GREP fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 $as_echo "$ac_cv_path_GREP" >&6; } GREP="$ac_cv_path_GREP" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 $as_echo_n "checking for egrep... " >&6; } if ${ac_cv_path_EGREP+:} false; then : $as_echo_n "(cached) " >&6 else if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 then ac_cv_path_EGREP="$GREP -E" else if test -z "$EGREP"; then ac_path_EGREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in egrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue # Check for GNU ac_path_EGREP and select it if it is found. # Check for GNU $ac_path_EGREP case `"$ac_path_EGREP" --version 2>&1` in *GNU*) ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'EGREP' >> "conftest.nl" "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_EGREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_EGREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_EGREP"; then as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_EGREP=$EGREP fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 $as_echo "$ac_cv_path_EGREP" >&6; } EGREP="$ac_cv_path_EGREP" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 $as_echo_n "checking for ANSI C header files... " >&6; } if ${ac_cv_header_stdc+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #include #include int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_header_stdc=yes else ac_cv_header_stdc=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "memchr" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "free" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : : else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #else # define ISLOWER(c) \ (('a' <= (c) && (c) <= 'i') \ || ('j' <= (c) && (c) <= 'r') \ || ('s' <= (c) && (c) <= 'z')) # define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) #endif #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) return 2; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : else ac_cv_header_stdc=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 $as_echo "$ac_cv_header_stdc" >&6; } if test $ac_cv_header_stdc = yes; then $as_echo "#define STDC_HEADERS 1" >>confdefs.h fi # On IRIX 5.3, sys/types and inttypes.h are conflicting. for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ inttypes.h stdint.h unistd.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default " if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done for ac_header in stdlib.h do : ac_fn_c_check_header_mongrel "$LINENO" "stdlib.h" "ac_cv_header_stdlib_h" "$ac_includes_default" if test "x$ac_cv_header_stdlib_h" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_STDLIB_H 1 _ACEOF fi done if test "x${have_pthreads}" = xyes; then for ac_header in pthread.h do : ac_fn_c_check_header_mongrel "$LINENO" "pthread.h" "ac_cv_header_pthread_h" "$ac_includes_default" if test "x$ac_cv_header_pthread_h" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_PTHREAD_H 1 _ACEOF else use_pthreads=no fi done fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking if PTHREAD_STACK_MIN is defined" >&5 $as_echo_n "checking if PTHREAD_STACK_MIN is defined... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include int main () {size_t stacksize = PTHREAD_STACK_MIN + 0x4000; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : use_pthread_stack_min=yes else use_pthread_stack_min=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $use_pthread_stack_min" >&5 $as_echo "$use_pthread_stack_min" >&6; } if test "x$use_pthread_stack_min" = xno; then use_pthreads=no fi if test "x${use_pthreads}" = "xno"; then echo "------------------------------------------" echo " Unable to find pthreads on this system. " echo " Building a single-threaded version. " echo "------------------------------------------" fi if test "x${use_pthreads}" = "xyes"; then $as_echo "#define USE_PTHREADS 1" >>confdefs.h fi ac_config_files="$ac_config_files src/Makevars" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs, see configure's option --config-cache. # It is not useful on other systems. If it contains results you don't # want to keep, you may remove or edit it. # # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # # `ac_cv_env_foo' variables (set or unset) will be overridden when # loading this file, other *unset* `ac_cv_foo' will be assigned the # following values. _ACEOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, we kill variables containing newlines. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. ( for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space=' '; set) 2>&1` in #( *${as_nl}ac_space=\ *) # `set' does not quote correctly, so add quotes: double-quote # substitution turns \\\\ into \\, and sed turns \\ into \. sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; #( *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) | sed ' /^ac_cv_env_/b end t clear :clear s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then if test "x$cache_file" != "x/dev/null"; then { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 $as_echo "$as_me: updating cache $cache_file" >&6;} if test ! -f "$cache_file" || test -h "$cache_file"; then cat confcache >"$cache_file" else case $cache_file in #( */* | ?:*) mv -f confcache "$cache_file"$$ && mv -f "$cache_file"$$ "$cache_file" ;; #( *) mv -f confcache "$cache_file" ;; esac fi fi else { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 $as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' # Transform confdefs.h into DEFS. # Protect against shell expansion while executing Makefile rules. # Protect against Makefile macro expansion. # # If the first sed substitution is executed (which looks for macros that # take arguments), then branch to the quote section. Otherwise, # look for a macro that doesn't take arguments. ac_script=' :mline /\\$/{ N s,\\\n,, b mline } t clear :clear s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g t quote s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g t quote b any :quote s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g s/\[/\\&/g s/\]/\\&/g s/\$/$$/g H :any ${ g s/^\n// s/\n/ /g p } ' DEFS=`sed -n "$ac_script" confdefs.h` ac_libobjs= ac_ltlibobjs= U= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' ac_i=`$as_echo "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' done LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" { $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 $as_echo "$as_me: creating $CONFIG_STATUS" >&6;} as_write_fail=0 cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 #! $SHELL # Generated by $as_me. # Run this file to recreate the current configuration. # Compiler output produced by configure, useful for debugging # configure, is in config.log if it exists. debug=false ac_cs_recheck=false ac_cs_silent=false SHELL=\${CONFIG_SHELL-$SHELL} export SHELL _ASEOF cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo # Prefer a ksh shell builtin over an external printf program on Solaris, # but without wasting forks for bash or zsh. if test -z "$BASH_VERSION$ZSH_VERSION" \ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='print -r --' as_echo_n='print -rn --' elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in #( *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # Unset variables that we do not need and which cause bugs (e.g. in # pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" # suppresses any "Segmentation fault" message there. '((' could # trigger a bug in pdksh 5.2.14. for as_var in BASH_ENV ENV MAIL MAILPATH do eval test x\${$as_var+set} = xset \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # CDPATH. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi $as_echo "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : eval 'as_fn_append () { eval $1+=\$2 }' else as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : eval 'as_fn_arith () { as_val=$(( $* )) }' else as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -p'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -p' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi else as_ln_s='cp -p' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi if test -x / >/dev/null 2>&1; then as_test_x='test -x' else if ls -dL / >/dev/null 2>&1; then as_ls_L_option=L else as_ls_L_option= fi as_test_x=' eval sh -c '\'' if test -d "$1"; then test -d "$1/."; else case $1 in #( -*)set "./$1";; esac; case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #(( ???[sx]*):;;*)false;;esac;fi '\'' sh ' fi as_executable_p=$as_test_x # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" exec 6>&1 ## ----------------------------------- ## ## Main body of $CONFIG_STATUS script. ## ## ----------------------------------- ## _ASEOF test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Save the log message, to keep $0 and so on meaningful, and to # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" This file was extended by $as_me, which was generated by GNU Autoconf 2.68. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS CONFIG_LINKS = $CONFIG_LINKS CONFIG_COMMANDS = $CONFIG_COMMANDS $ $0 $@ on `(hostname || uname -n) 2>/dev/null | sed 1q` " _ACEOF case $ac_config_files in *" "*) set x $ac_config_files; shift; ac_config_files=$*;; esac cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # Files that config.status was made for. config_files="$ac_config_files" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ac_cs_usage="\ \`$as_me' instantiates files and other configuration actions from templates according to the current configuration. Unless the files and actions are specified as TAGs, all are instantiated by default. Usage: $0 [OPTION]... [TAG]... -h, --help print this help, then exit -V, --version print version number and configuration settings, then exit --config print configuration, then exit -q, --quiet, --silent do not print progress messages -d, --debug don't remove temporary files --recheck update $as_me by reconfiguring in the same conditions --file=FILE[:TEMPLATE] instantiate the configuration file FILE Configuration files: $config_files Report bugs to the package provider." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ config.status configured by $0, generated by GNU Autoconf 2.68, with options \\"\$ac_cs_config\\" Copyright (C) 2010 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." ac_pwd='$ac_pwd' srcdir='$srcdir' test -n "\$AWK" || AWK=awk _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # The default lists apply if the user does not specify any file. ac_need_defaults=: while test $# != 0 do case $1 in --*=?*) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` ac_shift=: ;; --*=) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg= ac_shift=: ;; *) ac_option=$1 ac_optarg=$2 ac_shift=shift ;; esac case $ac_option in # Handling of the options. -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) $as_echo "$ac_cs_version"; exit ;; --config | --confi | --conf | --con | --co | --c ) $as_echo "$ac_cs_config"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift case $ac_optarg in *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; '') as_fn_error $? "missing file argument" ;; esac as_fn_append CONFIG_FILES " '$ac_optarg'" ac_need_defaults=false;; --he | --h | --help | --hel | -h ) $as_echo "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; # This is an error. -*) as_fn_error $? "unrecognized option: \`$1' Try \`$0 --help' for more information." ;; *) as_fn_append ac_config_targets " $1" ac_need_defaults=false ;; esac shift done ac_configure_extra_args= if $ac_cs_silent; then exec 6>/dev/null ac_configure_extra_args="$ac_configure_extra_args --silent" fi _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 if \$ac_cs_recheck; then set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion shift \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 CONFIG_SHELL='$SHELL' export CONFIG_SHELL exec "\$@" fi _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 exec 5>>config.log { echo sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX $as_echo "$ac_log" } >&5 _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Handling of arguments. for ac_config_target in $ac_config_targets do case $ac_config_target in "src/Makevars") CONFIG_FILES="$CONFIG_FILES src/Makevars" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac done # If the user did not use the arguments to specify the items to instantiate, # then the envvar interface is used. Set only those that are not. # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files fi # Have a temporary directory for convenience. Make it in the build tree # simply because there is no reason against having it here, and in addition, # creating and moving files from /tmp can sometimes cause problems. # Hook for its removal unless debugging. # Note that there is a small window in which the directory will not be cleaned: # after its creation but before its name has been assigned to `$tmp'. $debug || { tmp= ac_tmp= trap 'exit_status=$? : "${ac_tmp:=$tmp}" { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status ' 0 trap 'as_fn_exit 1' 1 2 13 15 } # Create a (secure) tmp directory for tmp files. { tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && test -d "$tmp" } || { tmp=./conf$$-$RANDOM (umask 077 && mkdir "$tmp") } || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 ac_tmp=$tmp # Set up the scripts for CONFIG_FILES section. # No need to generate them if there are no CONFIG_FILES. # This happens for instance with `./config.status config.h'. if test -n "$CONFIG_FILES"; then ac_cr=`echo X | tr X '\015'` # On cygwin, bash can eat \r inside `` if the user requested igncr. # But we know of no other shell where ac_cr would be empty at this # point, so we can use a bashism as a fallback. if test "x$ac_cr" = x; then eval ac_cr=\$\'\\r\' fi ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then ac_cs_awk_cr='\\r' else ac_cs_awk_cr=$ac_cr fi echo 'BEGIN {' >"$ac_tmp/subs1.awk" && _ACEOF { echo "cat >conf$$subs.awk <<_ACEOF" && echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && echo "_ACEOF" } >conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` ac_delim='%!_!# ' for ac_last_try in false false false false false :; do . ./conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` if test $ac_delim_n = $ac_delim_num; then break elif $ac_last_try; then as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done rm -f conf$$subs.sh cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && _ACEOF sed -n ' h s/^/S["/; s/!.*/"]=/ p g s/^[^!]*!// :repl t repl s/'"$ac_delim"'$// t delim :nl h s/\(.\{148\}\)..*/\1/ t more1 s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ p n b repl :more1 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t nl :delim h s/\(.\{148\}\)..*/\1/ t more2 s/["\\]/\\&/g; s/^/"/; s/$/"/ p b :more2 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t delim ' >$CONFIG_STATUS || ac_write_fail=1 rm -f conf$$subs.awk cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACAWK cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && for (key in S) S_is_set[key] = 1 FS = "" } { line = $ 0 nfields = split(line, field, "@") substed = 0 len = length(field[1]) for (i = 2; i < nfields; i++) { key = field[i] keylen = length(key) if (S_is_set[key]) { value = S[key] line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) len += length(value) + length(field[++i]) substed = 1 } else len += 1 + keylen } print line } _ACAWK _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" else cat fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 _ACEOF # VPATH may cause trouble with some makes, so we remove sole $(srcdir), # ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and # trailing colons and then remove the whole line if VPATH becomes empty # (actually we leave an empty line to preserve line numbers). if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ h s/// s/^/:/ s/[ ]*$/:/ s/:\$(srcdir):/:/g s/:\${srcdir}:/:/g s/:@srcdir@:/:/g s/^:*// s/:*$// x s/\(=[ ]*\).*/\1/ G s/\n// s/^[^=]*=[ ]*$// }' fi cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 fi # test -n "$CONFIG_FILES" eval set X " :F $CONFIG_FILES " shift for ac_tag do case $ac_tag in :[FHLC]) ac_mode=$ac_tag; continue;; esac case $ac_mode$ac_tag in :[FHL]*:*);; :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; :[FH]-) ac_tag=-:-;; :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; esac ac_save_IFS=$IFS IFS=: set x $ac_tag IFS=$ac_save_IFS shift ac_file=$1 shift case $ac_mode in :L) ac_source=$1;; :[FH]) ac_file_inputs= for ac_f do case $ac_f in -) ac_f="$ac_tmp/stdin";; *) # Look for the file first in the build tree, then in the source tree # (if the path is not absolute). The absolute path cannot be DOS-style, # because $ac_f cannot contain `:'. test -f "$ac_f" || case $ac_f in [\\/$]*) false;; *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; esac || as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; esac case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac as_fn_append ac_file_inputs " '$ac_f'" done # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' `' by configure.' if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 $as_echo "$as_me: creating $ac_file" >&6;} fi # Neutralize special characters interpreted by sed in replacement strings. case $configure_input in #( *\&* | *\|* | *\\* ) ac_sed_conf_input=`$as_echo "$configure_input" | sed 's/[\\\\&|]/\\\\&/g'`;; #( *) ac_sed_conf_input=$configure_input;; esac case $ac_tag in *:-:* | *:-) cat >"$ac_tmp/stdin" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; esac ;; esac ac_dir=`$as_dirname -- "$ac_file" || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` as_dir="$ac_dir"; as_fn_mkdir_p ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix case $ac_mode in :F) # # CONFIG_FILE # _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # If the template does not know about datarootdir, expand it. # FIXME: This hack should be removed a few years after 2.60. ac_datarootdir_hack=; ac_datarootdir_seen= ac_sed_dataroot=' /datarootdir/ { p q } /@datadir@/p /@docdir@/p /@infodir@/p /@localedir@/p /@mandir@/p' case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 $as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_datarootdir_hack=' s&@datadir@&$datadir&g s&@docdir@&$docdir&g s&@infodir@&$infodir&g s&@localedir@&$localedir&g s&@mandir@&$mandir&g s&\\\${datarootdir}&$datarootdir&g' ;; esac _ACEOF # Neutralize VPATH when `$srcdir' = `.'. # Shell code in configure.ac might set extrasub. # FIXME: do we really want to maintain this feature? cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_sed_extra="$ac_vpsub $extrasub _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 :t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b s|@configure_input@|$ac_sed_conf_input|;t t s&@top_builddir@&$ac_top_builddir_sub&;t t s&@top_build_prefix@&$ac_top_build_prefix&;t t s&@srcdir@&$ac_srcdir&;t t s&@abs_srcdir@&$ac_abs_srcdir&;t t s&@top_srcdir@&$ac_top_srcdir&;t t s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t s&@builddir@&$ac_builddir&;t t s&@abs_builddir@&$ac_abs_builddir&;t t s&@abs_top_builddir@&$ac_abs_top_builddir&;t t $ac_datarootdir_hack " eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ "$ac_tmp/out"`; test -z "$ac_out"; } && { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&5 $as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&2;} rm -f "$ac_tmp/stdin" case $ac_file in -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; esac \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; esac done # for ac_tag as_fn_exit 0 _ACEOF ac_clean_files=$ac_clean_files_save test $ac_write_fail = 0 || as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 # configure is writing to config.log, and then calls config.status. # config.status does its own redirection, appending to config.log. # Unfortunately, on DOS this fails, as config.log is still kept open # by configure, so config.status won't be able to write to it; its # output is simply discarded. So we exec the FD to /dev/null, # effectively closing config.log, so it can be properly (re)opened and # appended to by config.status. When coming back to configure, we # need to make the FD available again. if test "$no_create" != yes; then ac_cs_success=: ac_config_status_args= test "$silent" = yes && ac_config_status_args="$ac_config_status_args --quiet" exec 5>/dev/null $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false exec 5>>config.log # Use ||, not &&, to avoid exiting from the if with $? = 1, which # would make configure fail if this is the last instruction. $ac_cs_success || as_fn_exit 1 fi if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi preprocessCore/configure.in0000644000126300012640000000343712127132757017462 0ustar00biocbuildphs_compbiodnl dnl Configuration things for affyR. dnl (http://www.cbs.dtu.dk/laurent/download/affyR/ dnl What is below (and in the other configuration fiels dnl was taken from different configuration scripts for R version 1.3.0. dnl dnl Acknowledgments: The author(s) of the R configure scripts, Kurt Hornik for the tip with autoconf. dnl dnl Laurent 2001 AC_INIT("DESCRIPTION") dnl dnl Are things (still) the same ? dnl (taken from the 'writing R extensions manual') dnl Now find the compiler and compiler flags to use : ${R_HOME=`R RHOME`} if test -z "${R_HOME}"; then echo "could not determine R_HOME" exit 1 fi CC=`"${R_HOME}/bin/R" CMD config CC` CPP=`"${R_HOME}/bin/R" CMD config CPP` CFLAGS=`"${R_HOME}/bin/R" CMD config CFLAGS` CPPFLAGS=`"${R_HOME}/bin/R" CMD config CPPFLAGS` AC_PROG_CC AC_PROG_CPP # Checks for libraries. use_pthreads=no AC_SEARCH_LIBS([pthread_create], [pthread], [use_pthreads=yes]) # Checks for header files. AC_HEADER_STDC AC_CHECK_HEADERS([stdlib.h]) if test "x${have_pthreads}" = xyes; then AC_CHECK_HEADERS([pthread.h], [], [use_pthreads=no]) fi AC_MSG_CHECKING([if PTHREAD_STACK_MIN is defined]) AC_COMPILE_IFELSE([ #include #include int main () {size_t stacksize = PTHREAD_STACK_MIN + 0x4000; } ],[use_pthread_stack_min=yes], [use_pthread_stack_min=no]) AC_MSG_RESULT($use_pthread_stack_min) if test "x$use_pthread_stack_min" = xno; then use_pthreads=no fi if test "x${use_pthreads}" = "xno"; then echo "------------------------------------------" echo " Unable to find pthreads on this system. " echo " Building a single-threaded version. " echo "------------------------------------------" fi if test "x${use_pthreads}" = "xyes"; then AC_DEFINE(USE_PTHREADS, 1) fi AC_OUTPUT(src/Makevars) preprocessCore/inst/0000755000126300012640000000000012127132756016116 5ustar00biocbuildphs_compbiopreprocessCore/inst/include/0000755000126300012640000000000012127132757017542 5ustar00biocbuildphs_compbiopreprocessCore/inst/include/R_subColSummarize.h0000644000126300012640000000136512127132757023325 0ustar00biocbuildphs_compbio#ifndef R_SUBCOLSUMMARIZE_H #define R_SUBCOLSUMMARIZE_H SEXP R_subColSummarize_avg_log(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_log_avg(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_avg(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_biweight_log(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_biweight(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_median_log(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_log_median(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_median(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_medianpolish_log(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_medianpolish(SEXP RMatrix, SEXP R_rowIndexList); #endif preprocessCore/inst/include/R_subColSummarize_stubs.c0000644000126300012640000000537412127132757024544 0ustar00biocbuildphs_compbio#include #include SEXP R_subColSummarize_avg_log(SEXP RMatrix, SEXP R_rowIndexList){ static SEXP(*fun)(SEXP, SEXP) = NULL; if (fun == NULL) fun = (SEXP(*)(SEXP, SEXP))R_GetCCallable("preprocessCore","R_subColSummarize_avg_log"); return fun(RMatrix, R_rowIndexList); } SEXP R_subColSummarize_log_avg(SEXP RMatrix, SEXP R_rowIndexList){ static SEXP(*fun)(SEXP, SEXP) = NULL; if (fun == NULL) fun = (SEXP(*)(SEXP, SEXP))R_GetCCallable("preprocessCore","R_subColSummarize_log_avg"); return fun(RMatrix, R_rowIndexList); } SEXP R_subColSummarize_avg(SEXP RMatrix, SEXP R_rowIndexList){ static SEXP(*fun)(SEXP, SEXP) = NULL; if (fun == NULL) fun = (SEXP(*)(SEXP, SEXP))R_GetCCallable("preprocessCore","R_subColSummarize_avg"); return fun(RMatrix, R_rowIndexList); } SEXP R_subColSummarize_biweight_log(SEXP RMatrix, SEXP R_rowIndexList){ static SEXP(*fun)(SEXP, SEXP) = NULL; if (fun == NULL) fun = (SEXP(*)(SEXP, SEXP))R_GetCCallable("preprocessCore","R_subColSummarize_biweight_log"); return fun(RMatrix, R_rowIndexList); } SEXP R_subColSummarize_biweight(SEXP RMatrix, SEXP R_rowIndexList){ static SEXP(*fun)(SEXP, SEXP) = NULL; if (fun == NULL) fun = (SEXP(*)(SEXP, SEXP))R_GetCCallable("preprocessCore","R_subColSummarize_biweight"); return fun(RMatrix, R_rowIndexList); } SEXP R_subColSummarize_median_log(SEXP RMatrix, SEXP R_rowIndexList){ static SEXP(*fun)(SEXP, SEXP) = NULL; if (fun == NULL) fun = (SEXP(*)(SEXP, SEXP))R_GetCCallable("preprocessCore","R_subColSummarize_median_log"); return fun(RMatrix, R_rowIndexList); } SEXP R_subColSummarize_log_median(SEXP RMatrix, SEXP R_rowIndexList){ static SEXP(*fun)(SEXP, SEXP) = NULL; if (fun == NULL) fun = (SEXP(*)(SEXP, SEXP))R_GetCCallable("preprocessCore","R_subColSummarize_log_median"); return fun(RMatrix, R_rowIndexList); } SEXP R_subColSummarize_median(SEXP RMatrix, SEXP R_rowIndexList){ static SEXP(*fun)(SEXP, SEXP) = NULL; if (fun == NULL) fun = (SEXP(*)(SEXP, SEXP))R_GetCCallable("preprocessCore","R_subColSummarize_median"); return fun(RMatrix, R_rowIndexList); } SEXP R_subColSummarize_medianpolish_log(SEXP RMatrix, SEXP R_rowIndexList){ static SEXP(*fun)(SEXP, SEXP) = NULL; if (fun == NULL) fun = (SEXP(*)(SEXP, SEXP))R_GetCCallable("preprocessCore","R_subColSummarize_medianpolish_log"); return fun(RMatrix, R_rowIndexList); } SEXP R_subColSummarize_medianpolish(SEXP RMatrix, SEXP R_rowIndexList){ static SEXP(*fun)(SEXP, SEXP) = NULL; if (fun == NULL) fun = (SEXP(*)(SEXP, SEXP))R_GetCCallable("preprocessCore","R_subColSummarize_medianpolish"); return fun(RMatrix, R_rowIndexList); } preprocessCore/inst/include/avg.h0000644000126300012640000000752712127132757020503 0ustar00biocbuildphs_compbio/*! \file avg.h \brief Functions for column wise summarizing a matrix by using the mean */ #ifndef AVG_H #define AVG_H /*! \brief Compute the mean and SE of the mean for subset of rows * * Given a data matrix of probe intensities compute averageexpression measure and SE of this estimate * on a column by column basis using only a specified subset of rows. Specifically, the arithmetic mean * is computed for each column. The sample standard error is also computed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output averages. Should be of length cols * @param nprobes the number of elements in cur_rows * @param resultsSE pre-allocated space to store SE of averages. Should be of length cols * * */ void ColAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); /*! \brief compute the mean for specified subset of rows * * Given a data matrix of probe intensities compute average expression measure and SE of this estimate * on a column by column basis using only a specified subset of rows. Specifically, the arithmetic mean * is computed for each column. The sample standard error is also computed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output averages. Should be of length cols * @param nprobes the number of elements in cur_rows * * * */ void ColAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); /*! \brief Compute the mean and SE of the mean * * Given a data matrix of probe intensities compute average log2 expression measure and SE of this estimate * on a column by column basis. Specifically, each element is log2 transformed, then the arithmetic mean * is computed for each column. The sample standard error is also computed. This function guarantees that * no additional memory is temporarily allocated to copy the input data matrix. However, this means that * on output the input matrix may be changed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void colaverage_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); /*! \brief Compute the mean and SE of the mean * * Given a data matrix of probe intensities compute average log2 expression measure and SE of this estimate * on a column by column basis. Specifically, each element is log2 transformed, then the arithmetic mean * is computed for each column. The sample standard error is also computed. On output the data matrix will * be unchanged. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of averages. Should be of length cols * * */ void colaverage(double *data, int rows, int cols, double *results, double *resultsSE); #endif preprocessCore/inst/include/avg_log.h0000644000126300012640000001005312127132757021330 0ustar00biocbuildphs_compbio/*! \file avg_log.h \brief Functions for column wise summarizing a matrix by \f$log2\f$ transforming the data and then averaging */ #ifndef AVG_LOG_H #define AVG_LOG_H /*! \brief log2 transform and then compute the mean and SE of the mean for subset of rows * * Given a data matrix of probe intensities compute average log2 expression measure and SE of this estimate * on a column by column basis using only a specified subset of rows. Specifically, each element is log2 transformed, then the arithmetic mean * is computed for each column. The sample standard error is also computed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes the number of * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void AverageLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); /*! \brief log2 transform and then compute the mean for subset of rows * * Given a data matrix of probe intensities compute average log2 expression measure and SE of this estimate * on a column by column basis using only a specified subset of rows. Specifically, each element is log2 transformed, then the arithmetic mean * is computed for each column. The sample standard error is also computed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes the number of * * * */ void AverageLog_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); /*! \brief log2 transform and then compute the mean and SE of the mean * * Given a data matrix of probe intensities compute average log2 expression measure and SE of this estimate * on a column by column basis. Specifically, each element is log2 transformed, then the arithmetic mean * is computed for each column. The sample standard error is also computed. This function guarantees that * no additional memory is temporarily allocated to copy the input data matrix. However, this means that * on output the input matrix will be changed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void averagelog_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); /*! \brief log2 transform and then compute the mean and SE of the mean * * Given a data matrix of probe intensities compute average log2 expression measure and SE of this estimate * on a column by column basis. Specifically, each element is log2 transformed, then the arithmetic mean * is computed for each column. The sample standard error is also computed. On output the data matrix will * be unchanged. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void averagelog(double *data, int rows, int cols, double *results, double *resultsSE); #endif preprocessCore/inst/include/biweight.h0000644000126300012640000001075212127132757021522 0ustar00biocbuildphs_compbio/*! \file biweight.h \brief Functions for column wise summarizing a matrix by \f$log2\f$ transforming the data and then using a 1-step Tukey Biweight */ #ifndef BIWEIGHT_H #define BIWEIGHT_H 1 /*! \brief log2 transform the data and then use a 1-step Tukey Biweight to summarize each column * * Given a data matrix of probe intensities compute average expression measure then log2 it and SE of this estimate * on a column by column basis * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void tukeybiweight(double *data, int rows, int cols, double *results, double *resultsSE); /*! \brief Use a 1-step Tukey Biweight to summarize each column * * Given a data matrix of probe intensities compute average expression measure then log2 it and SE of this estimate * on a column by column basis * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void tukeybiweight_no_log(double *data, int rows, int cols, double *results, double *resultsSE); /*! \brief Use a 1-step Tukey Biweight to summarize each column * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute log2 transformed 1-step Tukey Biweight expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void TukeyBiweight(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); /*! \brief Use a 1-step Tukey Biweight to summarize each column * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, log2 transform each data item and then compute 1-step Tukey Biweight expression measure. * Note that data is a probes by chips matrix. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * * */ void TukeyBiweight_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); /*! \brief Use a 1-step Tukey Biweight to summarize each column * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute 1-step Tukey Biweight expression measure. * Note that data is a probes by chips matrix. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * */ void TukeyBiweight_no_log_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); /*! \brief compute a 1-step Tukey Biweight * * * implements one step Tukey Biweight as documented in the Affymetrix * Statistical Algorithms Description Document. * * @param x vector of data * @param length length of vector of data * */ double Tukey_Biweight(double *x, int length); #endif preprocessCore/inst/include/lm.h0000644000126300012640000000154012127132757020323 0ustar00biocbuildphs_compbio/*! \file lm.h \brief Functions for weighted least squares regression */ #ifndef LM_H #define LM_H /*! \brief Weighted least squares regression * * This function computes weighted linear regression estimates using QR decomposition * * * @param x - Design matrix: dimension rows*cols * @param y - dependent variable: length rows * @param w - weights for each observation: length rows * @param rows - dimension of input * @param cols - dimension of input * @param tol - machine tolerance used in qr decomp * @param out_beta - place to output beta estimates: length cols * @param out_resids - place to output residuals: length rows * ************************************************************************/ void lm_wfit(double *x, double *y, double *w, int rows, int cols, double tol, double *out_beta, double *out_resids); #endif preprocessCore/inst/include/log_avg.h0000644000126300012640000000516712127132757021342 0ustar00biocbuildphs_compbio/*! \file log_avg.h \brief Functions for column wise summarizing a matrix by averaging and then \f$log2\f$ transforming the computed mean */ #ifndef LOG_AVG_H #define LOG_AVG_H /*! \brief compute the mean then log2 transform and also SE of the log2 mean * * Given a data matrix of probe intensities compute average expression measure then log2 it and SE of this estimate * on a column by column basis * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void logaverage(double *data, int rows, int cols, double *results, double *resultsSE); /*! \brief compute the average and then log2 transform it. * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute log2 average expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void LogAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); /*! \brief compute the average and then log2 transform it. * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute log2 average expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * * */ void LogAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); #endif preprocessCore/inst/include/log_median.h0000644000126300012640000000765712127132757022030 0ustar00biocbuildphs_compbio/*! \file log_median.h \brief Functions for column wise summarizing a matrix by computing the median and then \f$\log_2\f$ transforming */ #ifndef LOG_MEDIAN_H #define LOG_MEDIAN_H 1 /*! \brief compute the median for subset of rows and the \f$\log_2\f$ transform it * * Given a data matrix of probe intensities compute median and then \f$\log_2\f$ transform it. Also compute SE of this estimate * on a column by column basis using only a specified subset of rows. Specifically, the median of each column is computed * the it is \f$\log_2\f$ transformed. The sample standard error is also computed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes the number of * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void LogMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); /*! \brief compute the median for subset of rows and the \f$\log_2\f$ transform it * * Given a data matrix of probe intensities compute median and then \f$\log_2\f$ transform it. Computed on a column by column basis using only a specified subset of rows. * Specifically, the median of each column is computed * the it is \f$\log_2\f$ transformed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes the number of rows in cur_rows * * */ void LogMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); /*! \brief compute the median for each column and then \f$\log_2\f$ transform it * * Given a data matrix of probe intensities compute median and then \f$\log_2\f$ transform it. Also produce the SE of this estimate * on a column by column basis. Specifically, the median is computed for each column and then \f$\log_2\f$ transformed. * The sample standard error is also computed. On output the data matrix will * be unchanged. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void logmedian(double *data, int rows, int cols, double *results, double *resultsSE); /*! \brief compute the median for each column and then \f$\log_2\f$ transform it * * Given a data matrix of probe intensities compute median and then \f$\log_2\f$ transform it. Also produce the SE of this estimate * on a column by column basis. Specifically, the median is computed for each column and then \f$\log_2\f$ transformed. * The sample standard error is also computed. On output the data matrix will * be changed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void logmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); #endif preprocessCore/inst/include/median.h0000644000126300012640000000720412127132757021153 0ustar00biocbuildphs_compbio/*! \file median.h \brief Functions for column wise summarizing a matrix by using the median */ #ifndef MEDIAN_H #define MEDIAN_H 1 /*! \brief Compute the median and SE of the median for subset of rows * * Given a data matrix of probe intensities compute median and SE of this estimate * on a column by column basis using only a specified subset of rows. Specifically, the arithmetic mean * is computed for each column. The sample standard error is also computed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output medians. Should be of length cols * @param nprobes the number of elements in cur_rows * @param resultsSE pre-allocated space to store SE of medians. Should be of length cols * * */ void ColMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); /*! \brief compute the median for specified subset of rows * * Given a data matrix of probe intensities compute median expression measure * on a column by column basis using only a specified subset of rows. Specifically, the median * is computed for each column. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output medians. Should be of length cols * @param nprobes the number of elements in cur_rows * * * */ void ColMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); /*! \brief Compute the median and SE of the median * * Given a data matrix of probe intensities compute median measure and SE of this estimate * on a column by column basis. Specifically, each element is log2 transformed, then the arithmetic mean * is computed for each column. The sample standard error is also computed. This function guarantees that * no additional memory is temporarily allocated to copy the input data matrix. However, this means that * on output the input matrix will be unchanged. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void colmedian(double *data, int rows, int cols, double *results, double *resultsSE); /*! \brief Compute the median and SE of the median * * Given a data matrix of probe intensities compute median measure and SE of this estimate * on a column by column basis. The sample standard error is also computed. On output the data matrix will * be changed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of averages. Should be of length cols * * */ void colmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); #endif preprocessCore/inst/include/median_log.h0000644000126300012640000000762512127132757022023 0ustar00biocbuildphs_compbio/*! \file median_log.h \brief Functions for column wise summarizing a matrix by computing the median of \f$\log_2\f$ transformed data */ #ifndef MEDIAN_LOG_H #define MEDIAN_LOG_H 1 /*! \brief \f$\log_2\f$ transform the data and compute the median * * Given a data matrix of probe intensities \f$\log_2\f$ transform it and then compute the median. Also compute SE of this estimate * on a column by column basis using only a specified subset of rows. Specifically, the median of each column is based on * \f$\log_2\f$ transformed data. The sample standard error is also computed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output log2 medians. Should be of length cols * @param nprobes the number of rows in cur_rows * @param resultsSE pre-allocated space to store SE of log2 medians. Should be of length cols * * */ void MedianLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); /*! \brief \f$\log_2\f$ transform the data and compute the median * * Given a data matrix of probe intensities \f$\log_2\f$ transform it and then compute the median on a column by column basis using only a specified subset of rows. * Specifically, the median of each column is based on \f$\log_2\f$ transformed data. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output log2 medians. Should be of length cols * @param nprobes the number of rows in cur_rows * */ void MedianLog_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); /*! \brief compute the median for each column of \f$\log_2\f$ transformed data. * * Given a data matrix of probe intensities \f$\log_2\f$ transform it then compute median of each column. Also produce the SE of this estimate * on a column by column basis. Specifically, the median is computed for each column and then \f$\log_2\f$ transformed. * The sample standard error is also computed. On output the data matrix will * be unchanged. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 medians. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 medians. Should be of length cols * * */ void medianlog(double *data, int rows, int cols, double *results, double *resultsSE); /*! \brief compute the median for each column of \f$\log_2\f$ transformed data. * * Given a data matrix of probe intensities \f$\log_2\f$ transform it then compute median of each column. Also produce the SE of this estimate * on a column by column basis. Specifically, the median is computed for each column and then \f$\log_2\f$ transformed. * The sample standard error is also computed. On output the data matrix will * be changed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 medians. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 medians. Should be of length cols * * */ void medianlog_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); #endif preprocessCore/inst/include/medianpolish.h0000644000126300012640000001474612127132757022403 0ustar00biocbuildphs_compbio/*! \file medianpolish.h \brief Functions for column wise summarizing a matrix by using the median polish algorithm */ #ifndef MEDIANPOLISH_H #define MEDIANPOLISH_H 1 /*! \brief Compute medianpolish * * * Given a data matrix, compute median polish parameter estimates * Note that data is a probes by chips matrix. Also compute SE estimates. * The input data matrix contains the fitted residuals on output. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param r pre-allocated space to store estimated row effects. Should be of length rows. Assumed on input that this is zero-ed out. * @param c pre-allocated space to store estimated column effects. Should be of length cols. Assumed on input that this is zero-ed out. * @param t pre-allocated space to store "intercept" effect. Should be of length 1. * */ void median_polish_fit_no_copy(double *data, int rows, int cols, double *r, double *c, double *t); /*! \brief Compute medianpolish * * * Given a data matrix of probe intensities, compute median polish expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates. * The input data matrix contains the fitted residuals on output. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols. Note that this is just NA values * */ void median_polish_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); /*! \brief Compute medianpolish * * * Given a data matrix of probe intensities, compute median polish expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates. This function * \f$\log_2\f$ transforms its input before apply the median polish. The input data matrix * contains the fitted residuals on output. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols. Note that this is just NA values * */ void median_polish_log2_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); /*! \brief Compute medianpolish * * * Given a data matrix of probe intensities, compute median polish expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates. This function * \f$\log_2\f$ transforms its input before apply the median polish. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols. Note that this is just NA values * @param residuals pre-allocated space to store the redsiuals. Should be of length rows*cols * */ void median_polish_log2(double *data, int rows, int cols, double *results, double *resultsSE, double *residuals); /*! \brief Compute medianpolish * * * Given a data matrix of probe intensities, compute median polish expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols. Note that this is just NA values * @param residuals pre-allocated space to store the redsiuals. Should be of length rows*cols * */ void median_polish(double *data, int rows, int cols, double *results, double *resultsSE, double *residuals); /*! \brief Compute medianpolish * * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute median polish expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates. This function * \f$\log_2\f$ transforms its inpue before apply the median polish. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols. Note that this is just NA values * * */ void MedianPolish(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); /*! \brief Compute medianpolish * * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute median polish expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols. Note that this is just NA values * * */ void MedianPolish_no_log(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); #endif preprocessCore/inst/include/plmd.h0000644000126300012640000000063512127132757020653 0ustar00biocbuildphs_compbio#ifndef PLMD_H #define PLMD_H void plmd_fit(double *y, int y_rows, int y_cols, int ngroups, int *grouplabels, int *was_split, double *out_beta, double *out_resids, double *out_weights, double (* PsiFn)(double, double, int), double psi_k,int max_iter); double *plmd_get_design_matrix(int y_rows, int y_cols, int ngroups, int *grouplabels,int *was_split,int *X_rows,int *X_cols); #endif preprocessCore/inst/include/plmr.h0000644000126300012640000001553512127132757020676 0ustar00biocbuildphs_compbio#ifndef PLMR_H #define PLMR_H #include "psi_fns.h" /*! \brief robust linear regression fit row-colum model using PLM-r * * Fits the model y = cols + rows + errors with constraint sum rows = 0. PLM-r (Probe Level Model-robust) attempts * to dyamically downweight entire rows and/or columns when a significant number of probes are poorly performing * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void plmr_fit(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); /*! \brief robust linear regression fit row-colum model using PLM-r * * Fits the model y = cols + rows + errors with constraint sum rows = 0. PLM-r (Probe Level Model-robust) attempts * to dyamically downweight entire rows and/or columns when a significant number of probes are poorly performing * * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param w weights for each observation: length y_rows*y_cols * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void plmr_wfit(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); /*! \brief robust linear regression fit row-colum model using PLM-rr * * Fits the model y = cols + rows + errors with constraint sum rows = 0. PLM-r (Probe Level Model-robust) attempts * to dyamically downweight entire rows when a significant number of probes are poorly performing * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void plmrr_fit(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int ini tialized); /*! \brief robust linear regression fit row-colum model using PLM-rc * * Fits the model y = cols + rows + errors with constraint sum rows = 0. PLM-r (Probe Level Model-robust) attempts * to dyamically downweight entire rows when a significant number of probes are poorly performing * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void plmrc_fit(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int ini tialized); /*! \brief robust linear regression fit row-colum model using PLM-rr * * Fits the model y = cols + rows + errors with constraint sum rows = 0. PLM-r (Probe Level Model-robust) attempts * to dyamically downweight entire rows when a significant number of probes are poorly performing * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param w weights for each observation: length y_rows*y_cols * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void plmrr_wfit(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_ iter, int initialized); /*! \brief robust linear regression fit row-colum model using PLM-rc * * Fits the model y = cols + rows + errors with constraint sum rows = 0. PLM-r (Probe Level Model-robust) attempts * to dyamically downweight entire columns when a significant number of probes are poorly performing * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param w weights for each observation: length y_rows*y_cols * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void plmrc_wfit(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_ iter, int initialized); #endif preprocessCore/inst/include/preprocessCore_background_stubs.c0000644000126300012640000000203012127132757026316 0ustar00biocbuildphs_compbio#include #include #ifndef PREPROCESSCORE_BACKGROUND_STUBS_H #define PREPROCESSCORE_BACKGROUND_STUBS_H 1 void rma_bg_parameters(double *PM,double *param, int rows, int cols, int column){ static void(*fun)(double *, double *, int, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, double *, int, int, int))R_GetCCallable("preprocessCore","rma_bg_parameters"); fun(PM, param, rows, cols, column); return; } void rma_bg_adjust(double *PM,double *param, int rows, int cols, int column){ static void(*fun)(double *, double *, int, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, double *, int, int, int))R_GetCCallable("preprocessCore","rma_bg_adjust"); fun(PM, param, rows, cols, column); return; } void rma_bg_correct(double *PM, int rows, int cols){ static void(*fun)(double *, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int))R_GetCCallable("preprocessCore","rma_bg_correct"); fun(PM, rows, cols); return; } #endif preprocessCore/inst/include/preprocessCore_background_stubs.h0000644000126300012640000000032512127132757026330 0ustar00biocbuildphs_compbiovoid rma_bg_parameters(double *PM,double *param, int rows, int cols, int column); void rma_bg_adjust(double *PM,double *param, int rows, int cols, int column); void rma_bg_correct(double *PM, int rows, int cols); preprocessCore/inst/include/preprocessCore_normalization_stubs.c0000644000126300012640000000454312127132757027100 0ustar00biocbuildphs_compbio#include #include #ifndef PREPROCESSCORE_NORMALIZATION_STUBS_H #define PREPROCESSCORE_NORMALIZATION_STUBS_H 1 /*! \brief Quantile normalize the columns of a matrix * * * @param data a matrix to be quantile normalized. On exit will be normalized * @param rows number of rows in the matrix * @param cols number of columns in the matrix * */ int qnorm_c(double *data, int *rows, int *cols){ static int(*fun)(double*, int*, int*) = NULL; if (fun == NULL) fun = (int(*)(double*, int*, int*))R_GetCCallable("preprocessCore","qnorm_c"); return fun(data,rows,cols); } SEXP R_qnorm_robust_weights(SEXP X, SEXP remove_extreme, SEXP n_remove){ static SEXP(*fun)(SEXP, SEXP, SEXP) = NULL; if (fun == NULL) fun = (SEXP(*)(SEXP, SEXP, SEXP))R_GetCCallable("preprocessCore","R_qnorm_robust_weights"); return fun(X,remove_extreme,n_remove); } int qnorm_robust_c(double *data,double *weights, int *rows, int *cols, int *use_median, int *use_log2, int *weight_scheme){ static int(*fun)(double*, double *, int*, int*, int *, int *, int *) = NULL; if (fun == NULL) fun = (int(*)(double*, double *, int*, int*, int *, int *, int *))R_GetCCallable("preprocessCore","qnorm_robust_c"); return fun(data,weights,rows,cols,use_median,use_log2,weight_scheme); } int qnorm_c_using_target(double *data, int *rows, int *cols, double *target, int *targetrows){ static int(*fun)(double *, int *, int *, double *, int *) = NULL; if (fun == NULL) fun = (int(*)(double *, int *, int *, double *, int *))R_GetCCallable("preprocessCore","qnorm_c_using_target"); return fun(data,rows,cols,target,targetrows); } int qnorm_c_determine_target(double *data, int *rows, int *cols, double *target, int *targetrows){ static int(*fun)(double *, int *, int *, double *, int *) = NULL; if (fun == NULL) fun = (int(*)(double *, int *, int *, double *, int *))R_GetCCallable("preprocessCore","qnorm_c_determine_target"); return fun(data, rows, cols, target, targetrows); } int qnorm_c_within_blocks(double *x, int *rows, int *cols, int *blocks){ static int(*fun)(double *, int *, int *, int *) = NULL; if (fun == NULL) fun = (int(*)(double *, int *, int *, int *))R_GetCCallable("preprocessCore","qnorm_c_within_blocks"); return fun(x, rows, cols, blocks); } #endif preprocessCore/inst/include/preprocessCore_normalization_stubs.h0000644000126300012640000000067012127132757027102 0ustar00biocbuildphs_compbioint qnorm_c(double *data, int *rows, int *cols); int qnorm_robust_c(double *data,double *weights, int *rows, int *cols, int *use_median, int *use_log2, int *weight_scheme); int qnorm_c_using_target(double *data, int *rows, int *cols, double *target, int *targetrows); int qnorm_c_determine_target(double *data, int *rows, int *cols, double *target, int *targetrows); int qnorm_c_within_blocks(double *x, int *rows, int *cols, int *blocks); preprocessCore/inst/include/preprocessCore_summarization_stubs.c0000644000126300012640000017026312127132757027117 0ustar00biocbuildphs_compbio#include #include /*! \brief log2 transform and then compute the mean and SE of the mean * * Given a data matrix of probe intensities compute average log2 expression measure and SE of this estimate * on a column by column basis. Specifically, each element is log2 transformed, then the arithmetic mean * is computed for each column. The sample standard error is also computed. This function guarantees that * no additional memory is temporarily allocated to copy the input data matrix. However, this means that * on output the input matrix will be changed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void averagelog_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double*, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, double *, double *))R_GetCCallable("preprocessCore","averagelog_no_copy"); fun(data,rows,cols,results,resultsSE); return; } /*! \brief log2 transform and then compute the mean and SE of the mean * * Given a data matrix of probe intensities compute average log2 expression measure and SE of this estimate * on a column by column basis * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void averagelog(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double*, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, double *, double *))R_GetCCallable("preprocessCore","averagelog"); fun(data,rows,cols,results,resultsSE); return; } /*! \brief log2 transform and then compute the mean and SE of the mean * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute average log2 expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void AverageLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ static void(*fun)(double*, int, int, int*, double *, int, double *) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, int*, double *, int, double *))R_GetCCallable("preprocessCore","AverageLog"); fun(data,rows,cols,cur_rows,results,nprobes,resultsSE); return; } /*! \brief log2 transform and then compute the mean * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute average log2 expression measure. * Note that data is a probes by chips matrix. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * * */ void AverageLog_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ static void(*fun)(double*, int, int, int*, double *, int) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, int*, double *, int))R_GetCCallable("preprocessCore","AverageLog_noSE"); fun(data,rows,cols,cur_rows,results,nprobes); return; } /*! \brief compute the mean then log2 transform and also SE of the log2 mean * * Given a data matrix of probe intensities compute average expression measure then log2 it and SE of this estimate * on a column by column basis * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void logaverage(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double*, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, double *, double *))R_GetCCallable("preprocessCore","logaverage"); fun(data,rows,cols,results,resultsSE); return; } /*! \brief compute the average and then log2 transform it. * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute log2 average expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void LogAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ static void(*fun)(double*, int, int, int*, double *, int, double *) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, int*, double *, int, double *))R_GetCCallable("preprocessCore","LogAverage"); fun(data, rows, cols, cur_rows, results, nprobes, resultsSE); return; } /*! \brief compute the average and then log2 transform it. * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute log2 average expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * * */ void LogAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ static void(*fun)(double*, int, int, int*, double *, int) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, int*, double *, int))R_GetCCallable("preprocessCore","LogAverage_noSE"); fun(data, rows, cols, cur_rows, results, nprobes); return; } /*! \brief log2 transform the data and then use a 1-step Tukey Biweight to summarize each column * * Given a data matrix of probe intensities compute average expression measure then log2 it and SE of this estimate * on a column by column basis * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void tukeybiweight(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double *, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *))R_GetCCallable("preprocessCore","tukeybiweight"); fun(data, rows, cols, results, resultsSE); return; } /*! \brief Use a 1-step Tukey Biweight to summarize each column * * Given a data matrix of probe intensities compute average expression measure then log2 it and SE of this estimate * on a column by column basis * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void tukeybiweight_no_log(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double *, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *))R_GetCCallable("preprocessCore","tukeybiweight_no_log"); fun(data, rows, cols, results, resultsSE); return; } /*! \brief Use a 1-step Tukey Biweight to summarize each column * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute log2 transformed 1-step Tukey Biweight expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void TukeyBiweight(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ static void(*fun)(double *, int, int, int *, double *, int, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, int *, double *, int, double *))R_GetCCallable("preprocessCore","TukeyBiweight"); fun(data, rows, cols, cur_rows,results, nprobes, resultsSE); return; } /*! \brief Use a 1-step Tukey Biweight to summarize each column * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, log2 transform each data item and then compute 1-step Tukey Biweight expression measure. * Note that data is a probes by chips matrix. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * * */ void TukeyBiweight_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ static void(*fun)(double *, int, int, int *, double *, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, int *, double *, int))R_GetCCallable("preprocessCore","TukeyBiweight_noSE"); fun(data, rows, cols, cur_rows,results, nprobes); return; } /*! \brief Use a 1-step Tukey Biweight to summarize each column * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute 1-step Tukey Biweight expression measure. * Note that data is a probes by chips matrix. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * */ void TukeyBiweight_no_log_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ static void(*fun)(double *, int, int, int *, double *, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, int *, double *, int))R_GetCCallable("preprocessCore","TukeyBiweight_no_log_noSE"); fun(data, rows, cols, cur_rows,results, nprobes); return; } /*! \brief compute a 1-step Tukey Biweight * * * implements one step Tukey Biweight as documented in the Affymetrix * Statistical Algorithms Description Document. * * @param x vector of data * @param length length of vector of data * */ double Tukey_Biweight(double *x, int length){ static double(*fun)(double *, int) = NULL; if (fun == NULL) fun = (double (*)(double *, int))R_GetCCallable("preprocessCore","Tukey_Biweight"); return fun(x,length); } void lm_wfit(double *x, double *y, double *w, int rows, int cols, double tol, double *out_beta, double *out_resids){ static void(*fun)(double *, double *, double *, int, int, double, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, double *, double *, int, int, double, double *, double *))R_GetCCallable("preprocessCore","lm_wfit"); fun(x,y,w,rows,cols,tol,out_beta,out_resids); return; } /*! \brief Compute medianpolish * * * Given a data matrix, compute median polish parameter estimates * Note that data is a probes by chips matrix. Also compute SE estimates. * The input data matrix contains the fitted residuals on output. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param r pre-allocated space to store estimated row effects. Should be of length rows. Assumed on input that this is zero-ed out. * @param c pre-allocated space to store estimated column effects. Should be of length cols. Assumed on input that this is zero-ed out. * @param t pre-allocated space to store "intercept" effect. Should be of length 1. * */ void median_polish_fit_no_copy(double *data, int rows, int cols, double *r, double *c, double *t){ static void(*fun)(double *, int, int, double *, double *, double*) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *, double*))R_GetCCallable("preprocessCore","median_polish_fit_no_copy"); fun(data, rows, cols, r, c, t); return; } /*! \brief Compute medianpolish * * * Given a data matrix of probe intensities, compute median polish expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates. * The input data matrix contains the fitted residuals on output. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols. Note that this is just NA values * */ void median_polish_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double *, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *))R_GetCCallable("preprocessCore","median_polish_no_copy"); fun(data,rows,cols,results,resultsSE); return; } /*! \brief Compute medianpolish * * * Given a data matrix of probe intensities, compute median polish expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates. This function * \f$\log_2\f$ transforms its input before apply the median polish. The input data matrix * contains the fitted residuals on output. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols. Note that this is just NA values * */ void median_polish_log2_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double *, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *))R_GetCCallable("preprocessCore","median_polish_log2_no_copy"); fun(data,rows,cols,results,resultsSE); return; } /*! \brief Compute medianpolish * * * Given a data matrix of probe intensities, compute median polish expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates. This function * \f$\log_2\f$ transforms its input before apply the median polish. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols. Note that this is just NA values * @param residuals pre-allocated space to store the redsiuals. Should be of length rows*cols * */ void median_polish_log2(double *data, int rows, int cols, double *results, double *resultsSE, double *residuals){ static void(*fun)(double *, int, int, double *, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *, double *))R_GetCCallable("preprocessCore","median_polish_log2"); fun(data,rows,cols,results,resultsSE,residuals); return; } /*! \brief Compute medianpolish * * * Given a data matrix of probe intensities, compute median polish expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols. Note that this is just NA values * @param residuals pre-allocated space to store the redsiuals. Should be of length rows*cols * */ void median_polish(double *data, int rows, int cols, double *results, double *resultsSE, double *residuals){ static void(*fun)(double *, int, int, double *, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *, double *))R_GetCCallable("preprocessCore","median_polish"); fun(data,rows,cols,results,resultsSE,residuals); return; } /*! \brief Compute medianpolish * * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute median polish expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates. This function * \f$\log_2\f$ transforms its inpue before apply the median polish. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols. Note that this is just NA values * * */ void MedianPolish(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ static void(*fun)(double *, int, int, int *, double *, int, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, int *, double *, int, double *))R_GetCCallable("preprocessCore","MedianPolish"); fun(data,rows,cols,cur_rows,results,nprobes,resultsSE); return; } /*! \brief Compute medianpolish * * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute median polish expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols. Note that this is just NA values * * */ void MedianPolish_no_log(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ static void(*fun)(double *, int, int, int *, double *, int, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, int *, double *, int, double *))R_GetCCallable("preprocessCore","MedianPolish_no_log"); fun(data,rows,cols,cur_rows,results,nprobes,resultsSE); return; } void rlm_fit(double *x, double *y, int rows, int cols, double *out_beta, double *out_resids, double *out_weights, double (* PsiFn)(double, double, int), double psi_k, int max_iter,int initialized){ static void(*fun)(double *, double *, int, int, double *, double *, double *, double (*)(double, double, int), double, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, double *, int, int, double *, double *, double *, double (*)(double, double, int), double, int, int))R_GetCCallable("preprocessCore","rlm_fit"); fun(x, y, rows, cols, out_beta, out_resids,out_weights, PsiFn, psi_k, max_iter, initialized); return; } void rlm_wfit(double *x, double *y, double *w, int rows, int cols, double *out_beta, double *out_resids, double *out_weights, double (* PsiFn)(double, double, int), double psi_k, int max_iter,int initialized){ static void(*fun)(double *, double *, double *, int, int, double *, double *, double *, double (*)(double, double, int), double, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, double *, double *, int, int, double *, double *, double *, double (*)(double, double, int), double, int, int))R_GetCCallable("preprocessCore","rlm_wfit"); fun(x, y, w, rows, cols, out_beta, out_resids,out_weights, PsiFn, psi_k, max_iter, initialized); return; } double med_abs(double *x, int length){ static double(*fun)(double *, int) = NULL; if (fun == NULL) fun = (double(*)(double *, int))R_GetCCallable("preprocessCore","med_abs"); return fun(x, length); } void rlm_fit_anova(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ static void(*fun)(double *, int, int, double *, double *, double *, double (*)(double, double, int), double, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *, double *, double (*)(double, double, int), double, int, int))R_GetCCallable("preprocessCore","rlm_fit_anova"); fun(y, y_rows, y_cols, out_beta, out_resids,out_weights, PsiFn, psi_k, max_iter, initialized); return; } void rlm_wfit_anova(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ static void(*fun)(double *, int, int, double *, double *, double *, double *, double (*)(double, double, int), double, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *, double *, double *, double (*)(double, double, int), double, int, int))R_GetCCallable("preprocessCore","rlm_wfit_anova"); fun(y, y_rows, y_cols, w, out_beta, out_resids,out_weights, PsiFn, psi_k, max_iter, initialized); return; } void rlm_compute_se(double *X,double *Y, int n, int p, double *beta, double *resids,double *weights,double *se_estimates,double *varcov, double *residSE, int method,double (* PsiFn)(double, double, int), double psi_k){ static void(*fun)(double *,double *, int, int, double *, double *, double *, double *, double *, double *, int, double (*)(double, double, int), double) = NULL; if (fun == NULL) fun = (void(*)(double *,double *, int, int, double *, double *, double *, double *, double *, double *, int, double (*)(double, double, int), double))R_GetCCallable("preprocessCore","rlm_compute_se"); fun(X, Y, n, p, beta, resids, weights, se_estimates, varcov, residSE, method, PsiFn, psi_k); return; } void rlm_compute_se_anova(double *Y, int y_rows,int y_cols, double *beta, double *resids,double *weights,double *se_estimates, double *varcov, double *residSE, int method,double (* PsiFn)(double, double, int), double psi_k){ static void(*fun)(double *, int, int, double *, double *,double *,double *, double *, double *, int, double (*)(double, double, int), double) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *,double *,double *, double *, double *, int, double (*)(double, double, int), double))R_GetCCallable("preprocessCore","rlm_compute_se_anova"); fun(Y, y_rows, y_cols, beta, resids, weights, se_estimates, varcov, residSE, method, PsiFn, psi_k); return; } /*! \brief \f$\log_2\f$ transform the data and compute the median * * Given a data matrix of probe intensities \f$\log_2\f$ transform it and then compute the median. Also compute SE of this estimate * on a column by column basis using only a specified subset of rows. Specifically, the median of each column is based on * \f$\log_2\f$ transformed data. The sample standard error is also computed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output log2 medians. Should be of length cols * @param nprobes the number of rows in cur_rows * @param resultsSE pre-allocated space to store SE of log2 medians. Should be of length cols * * */ void MedianLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ static void(*fun)(double *, int, int, int *, double *, int, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, int *, double *, int, double *))R_GetCCallable("preprocessCore","MedianLog"); fun(data, rows, cols, cur_rows, results, nprobes, resultsSE); return; } /*! \brief \f$\log_2\f$ transform the data and compute the median * * Given a data matrix of probe intensities \f$\log_2\f$ transform it and then compute the median on a column by column basis using only a specified subset of rows. * Specifically, the median of each column is based on \f$\log_2\f$ transformed data. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output log2 medians. Should be of length cols * @param nprobes the number of rows in cur_rows * */ void MedianLog_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ static void(*fun)(double *, int, int, int *, double *, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, int *, double *, int))R_GetCCallable("preprocessCore","MedianLog_noSE"); fun(data, rows, cols, cur_rows, results, nprobes); return; } /*! \brief compute the median for each column of \f$\log_2\f$ transformed data. * * Given a data matrix of probe intensities \f$\log_2\f$ transform it then compute median of each column. Also produce the SE of this estimate * on a column by column basis. Specifically, the median is computed for each column and then \f$\log_2\f$ transformed. * The sample standard error is also computed. On output the data matrix will * be unchanged. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 medians. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 medians. Should be of length cols * * */ void medianlog(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double *, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *))R_GetCCallable("preprocessCore","medianlog"); fun(data, rows, cols, results, resultsSE); return; } /*! \brief compute the median for each column of \f$\log_2\f$ transformed data. * * Given a data matrix of probe intensities \f$\log_2\f$ transform it then compute median of each column. Also produce the SE of this estimate * on a column by column basis. Specifically, the median is computed for each column and then \f$\log_2\f$ transformed. * The sample standard error is also computed. On output the data matrix will * be changed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 medians. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 medians. Should be of length cols * * */ void medianlog_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double *, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *))R_GetCCallable("preprocessCore","medianlog_no_copy"); fun(data, rows, cols, results, resultsSE); return; } /*! \brief compute the median for subset of rows and the \f$\log_2\f$ transform it * * Given a data matrix of probe intensities compute median and then \f$\log_2\f$ transform it. Also compute SE of this estimate * on a column by column basis using only a specified subset of rows. Specifically, the median of each column is computed * the it is \f$\log_2\f$ transformed. The sample standard error is also computed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes the number of rows in cur_rows * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void LogMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ static void(*fun)(double *, int, int, int *, double *, int, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, int *, double *, int, double *))R_GetCCallable("preprocessCore","LogMedian"); fun(data, rows, cols, cur_rows, results, nprobes, resultsSE); return; } /*! \brief compute the median for subset of rows and the \f$\log_2\f$ transform it * * Given a data matrix of probe intensities compute median and then \f$\log_2\f$ transform it. Computed on a column by column basis using only a specified subset of rows. * Specifically, the median of each column is computed * the it is \f$\log_2\f$ transformed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes the number of rows in cur_rows * * */ void LogMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ static void(*fun)(double *, int, int, int *, double *, int, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, int *, double *, int, double *))R_GetCCallable("preprocessCore","LogMedian_noSE"); fun(data, rows, cols, cur_rows, results, nprobes, resultsSE); return; } /*! \brief compute the median for each column and then \f$\log_2\f$ transform it * * Given a data matrix of probe intensities compute median and then \f$\log_2\f$ transform it. Also produce the SE of this estimate * on a column by column basis. Specifically, the median is computed for each column and then \f$\log_2\f$ transformed. * The sample standard error is also computed. On output the data matrix will * be unchanged. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void logmedian(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double *, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *))R_GetCCallable("preprocessCore","logmedian"); fun(data, rows, cols, results, resultsSE); return; } /*! \brief compute the median for each column and then \f$\log_2\f$ transform it * * Given a data matrix of probe intensities compute median and then \f$\log_2\f$ transform it. Also produce the SE of this estimate * on a column by column basis. Specifically, the median is computed for each column and then \f$\log_2\f$ transformed. * The sample standard error is also computed. On output the data matrix will * be changed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void logmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double *, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *))R_GetCCallable("preprocessCore","logmedian_no_copy"); fun(data, rows, cols, results, resultsSE); return; } /*! \brief Compute the mean and SE of the mean * * Given a data matrix of probe intensities compute average log2 expression measure and SE of this estimate * on a column by column basis. Specifically, each element is log2 transformed, then the arithmetic mean * is computed for each column. The sample standard error is also computed. This function guarantees that * no additional memory is temporarily allocated to copy the input data matrix. However, this means that * on output the input matrix may be changed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void colaverage_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double*, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, double *, double *))R_GetCCallable("preprocessCore","colaverage_no_copy"); fun(data,rows,cols,results,resultsSE); return; } /*! \brief Compute the mean and SE of the mean * * Given a data matrix of probe intensities compute average log2 expression measure and SE of this estimate * on a column by column basis. Specifically, each element is log2 transformed, then the arithmetic mean * is computed for each column. The sample standard error is also computed. This function guarantees that * no additional memory is temporarily allocated to copy the input data matrix. However, this means that * on output the input matrix may be changed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void colaverage(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double*, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, double *, double *))R_GetCCallable("preprocessCore","colaverage"); fun(data,rows,cols,results,resultsSE); return; } /*! \brief Compute the mean and SE of the mean for subset of rows * * Given a data matrix of probe intensities compute averageexpression measure and SE of this estimate * on a column by column basis using only a specified subset of rows. Specifically, the arithmetic mean * is computed for each column. The sample standard error is also computed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output averages. Should be of length cols * @param nprobes the number of elements in cur_rows * @param resultsSE pre-allocated space to store SE of averages. Should be of length cols * * */ void ColAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ static void(*fun)(double*, int, int, int*, double *, int, double *) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, int*, double *, int, double *))R_GetCCallable("preprocessCore","ColAverage"); fun(data, rows, cols, cur_rows, results, nprobes, resultsSE); return; } /*! \brief compute the mean for specified subset of rows * * Given a data matrix of probe intensities compute average expression measure and SE of this estimate * on a column by column basis using only a specified subset of rows. Specifically, the arithmetic mean * is computed for each column. The sample standard error is also computed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output averages. Should be of length cols * @param nprobes the number of elements in cur_rows * * * */ void ColAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ static void(*fun)(double*, int, int, int*, double *, int) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, int*, double *, int))R_GetCCallable("preprocessCore","ColAverage_noSE"); fun(data, rows, cols, cur_rows, results, nprobes); return; } /*! \brief Compute the median and SE of the median * * Given a data matrix of probe intensities compute median measure and SE of this estimate * on a column by column basis. The sample standard error is also computed. On output the data matrix will * be changed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of averages. Should be of length cols * * */ void colmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double*, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, double *, double *))R_GetCCallable("preprocessCore","colmedian_no_copy"); fun(data,rows,cols,results,resultsSE); return; } /*! \brief Compute the median and SE of the median * * Given a data matrix of probe intensities compute median measure and SE of this estimate * on a column by column basis. Specifically, each element is log2 transformed, then the arithmetic mean * is computed for each column. The sample standard error is also computed. This function guarantees that * no additional memory is temporarily allocated to copy the input data matrix. However, this means that * on output the input matrix will be unchanged. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void colmedian(double *data, int rows, int cols, double *results, double *resultsSE){ static void(*fun)(double*, int, int, double *, double *) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, double *, double *))R_GetCCallable("preprocessCore","colmedian"); fun(data,rows,cols,results,resultsSE); return; } /*! \brief Compute the median and SE of the median for subset of rows * * Given a data matrix of probe intensities compute median and SE of this estimate * on a column by column basis using only a specified subset of rows. Specifically, the arithmetic mean * is computed for each column. The sample standard error is also computed. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output medians. Should be of length cols * @param nprobes the number of elements in cur_rows * @param resultsSE pre-allocated space to store SE of medians. Should be of length cols * * */ void ColMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ static void(*fun)(double*, int, int, int*, double *, int, double *) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, int*, double *, int, double *))R_GetCCallable("preprocessCore","ColMedian"); fun(data, rows, cols, cur_rows, results, nprobes, resultsSE); return; } /*! \brief compute the median for specified subset of rows * * Given a data matrix of probe intensities compute median expression measure * on a column by column basis using only a specified subset of rows. Specifically, the median * is computed for each column. * * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows indices specifying which rows in the matrix to use * @param results pre-allocated space to store output medians. Should be of length cols * @param nprobes the number of elements in cur_rows * * * */ void ColMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ static void(*fun)(double*, int, int, int*, double *, int) = NULL; if (fun == NULL) fun = (void(*)(double*, int, int, int*, double *, int))R_GetCCallable("preprocessCore","ColMedian_noSE"); fun(data, rows, cols, cur_rows, results, nprobes); return; } /*! \brief robust linear regression fit row-colum model using PLM-r * * Fits the model y = cols + rows + errors with constraint sum rows = 0. PLM-r (Probe Level Model-robust) attempts * to dyamically downweight entire rows and/or columns when a siginficant number of probes are poorly performing * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void plmr_fit(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ static void(*fun)(double *, int, int, double *, double *, double *, double (*)(double, double, int), double, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *, double *, double (*)(double, double, int), double, int, int))R_GetCCallable("preprocessCore","plmr_fit"); fun(y, y_rows, y_cols, out_beta, out_resids,out_weights, PsiFn, psi_k, max_iter, initialized); return; } /*! \brief robust linear regression fit row-colum model using PLM-r * * Fits the model y = cols + rows + errors with constraint sum rows = 0. PLM-r (Probe Level Model-robust) attempts * to dyamically downweight entire rows and/or columns when a siginficant number of probes are poorly performing * * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param w weights for each observation: length y_rows*y_cols * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void plmr_wfit(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ static void(*fun)(double *, int, int, double *, double *, double *, double *, double (*)(double, double, int), double, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *, double *, double *, double (*)(double, double, int), double, int, int))R_GetCCallable("preprocessCore","plmr_wfit"); fun(y, y_rows, y_cols, w, out_beta, out_resids,out_weights, PsiFn, psi_k, max_iter, initialized); return; } /*! \brief robust linear regression fit row-colum model using PLM-rr * * Fits the model y = cols + rows + errors with constraint sum rows = 0. PLM-r (Probe Level Model-robust) attempts * to dyamically downweight entire rows when a significant number of probes are poorly performing * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void plmrr_fit(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ static void(*fun)(double *, int, int, double *, double *, double *, double (*)(double, double, int), double, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *, double *, double (*)(double, double, int), double, int, int))R_GetCCallable("preprocessCore","plmrr_fit"); fun(y, y_rows, y_cols, out_beta, out_resids,out_weights, PsiFn, psi_k, max_iter, initialized); return; } /*! \brief robust linear regression fit row-colum model using PLM-rc * * Fits the model y = cols + rows + errors with constraint sum rows = 0. PLM-r (Probe Level Model-robust) attempts * to dyamically downweight entire rows when a significant number of probes are poorly performing * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void plmrc_fit(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ static void(*fun)(double *, int, int, double *, double *, double *, double (*)(double, double, int), double, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *, double *, double (*)(double, double, int), double, int, int))R_GetCCallable("preprocessCore","plmrc_fit"); fun(y, y_rows, y_cols, out_beta, out_resids,out_weights, PsiFn, psi_k, max_iter, initialized); return; } /*! \brief robust linear regression fit row-colum model using PLM-rr * * Fits the model y = cols + rows + errors with constraint sum rows = 0. PLM-r (Probe Level Model-robust) attempts * to dyamically downweight entire rows when a significant number of probes are poorly performing * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param w weights for each observation: length y_rows*y_cols * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void plmrr_wfit(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ static void(*fun)(double *, int, int, double *, double *, double *, double *, double (*)(double, double, int), double, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *, double *, double *, double (*)(double, double, int), double, int, int))R_GetCCallable("preprocessCore","plmrr_wfit"); fun(y, y_rows, y_cols, w, out_beta, out_resids,out_weights, PsiFn, psi_k, max_iter, initialized); return; } /*! \brief robust linear regression fit row-colum model using PLM-rc * * Fits the model y = cols + rows + errors with constraint sum rows = 0. PLM-r (Probe Level Model-robust) attempts * to dyamically downweight entire columns when a significant number of probes are poorly performing * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param w weights for each observation: length y_rows*y_cols * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void plmrc_wfit(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ static void(*fun)(double *, int, int, double *, double *, double *, double *, double (*)(double, double, int), double, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *, double *, double *, double (*)(double, double, int), double, int, int))R_GetCCallable("preprocessCore","plmrc_wfit"); fun(y, y_rows, y_cols, w, out_beta, out_resids,out_weights, PsiFn, psi_k, max_iter, initialized); return; } /*! \brief apply row effects of a robust linear regression fit row-colum model * * Using pre-computed row effects from the model y = cols + rows + errors with constraint sum rows = 0 * apply on a single column by column basis to robustly estimate col effects for each column * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param probe_effects previously computed row effects with length y_rows. Assumed that it sums to 0. * @param out_beta place to output beta estimates: length y_cols * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void rlm_fit_anova_given_probe_effects(double *y, int y_rows, int y_cols, double *probe_effects, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double,double, int), double psi_k,int max_iter, int initialized){ static void(*fun)(double *, int, int, double *, double *, double *, double *, double (*)(double, double, int), double, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *, double *, double *, double (*)(double, double, int), double, int, int))R_GetCCallable("preprocessCore","rlm_fit_anova_given_probe_effects"); fun(y, y_rows, y_cols, probe_effects, out_beta, out_resids,out_weights, PsiFn, psi_k, max_iter, initialized); return; } /*! \brief Estimate SE for robust linear regression fit using iteratively reweighted least squares * * Specialized to the model y = cols + rows + error model where the rows parameters are previously specified * designed to work independently for each column so can be applied "chip by chip" * * @param Y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param probe_effects previously computed row effects with length y_rows. Assumed that it sums to 0. * @param beta fitted parameter estimates: length y_rows + y_cols -1 * @param resids estimated residuals: length y_rows*y_cols * @param weights estimated weights: length y_rows*y_cols * @param se_estimates on output contains standard error estimates : y_rows + y_cols -1 * @param varcov a place to store estimated variance covariance matrix: dimension (y_rows + y_cols -1)*(y_rows + y_cols -1) (or set to NULL) * @param residSE estimated residual standard error * @param method should be integer 1,2,3 or 4 (4 is the default). Currently ignored. * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * */ void rlm_compute_se_anova_given_probe_effects(double *Y, int y_rows,int y_cols, double *probe_effects,double *beta, double *resids,double *weights,double *se_estimates, double *varcov, double *residSE, int method,double (* PsiFn)(double, double, int), double psi_k){ static void(*fun)(double *, int, int, double *, double *, double *,double *,double *, double *, double *, int, double (*)(double, double, int), double) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *,double *, double *,double *,double *, double *, double *, int, double (*)(double, double, int), double))R_GetCCallable("preprocessCore","rlm_compute_se_anova_given_probe_effects"); fun(Y, y_rows, y_cols, probe_effects, beta, resids, weights, se_estimates, varcov, residSE, method, PsiFn, psi_k); return; } /*! \brief apply row effects of a robust linear regression fit row-colum model * * Using pre-computed row effects from the model y = cols + rows + errors with constraint sum rows = 0 * apply on a single column by column basis to robustly estimate col effects for each column * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param probe_effects previously computed row effects with length y_rows. Assumed that it sums to 0. * @param w weights for each observation: length y_rows*y_cols * @param out_beta place to output beta estimates: length y_cols * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void rlm_wfit_anova_given_probe_effects(double *y, int y_rows, int y_cols, double *probe_effects, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double,double, int), double psi_k,int max_iter, int initialized){ static void(*fun)(double *, int, int, double *, double *, double *, double *, double *, double (*)(double, double, int), double, int, int) = NULL; if (fun == NULL) fun = (void(*)(double *, int, int, double *, double *, double *, double *, double *, double (*)(double, double, int), double, int, int))R_GetCCallable("preprocessCore","rlm_wfit_anova_given_probe_effects"); fun(y, y_rows, y_cols, probe_effects, w, out_beta, out_resids,out_weights, PsiFn, psi_k, max_iter, initialized); return; } preprocessCore/inst/include/preprocessCore_summarization_stubs.h0000644000126300012640000001535612127132757027125 0ustar00biocbuildphs_compbiovoid averagelog_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); void averagelog(double *data, int rows, int cols, double *results, double *resultsSE); void AverageLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void AverageLog_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); void logaverage(double *data, int rows, int cols, double *results, double *resultsSE); void LogAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void LogAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); void tukeybiweight(double *data, int rows, int cols, double *results, double *resultsSE); void tukeybiweight_no_log(double *data, int rows, int cols, double *results, double *resultsSE); void TukeyBiweight(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void TukeyBiweight_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); void TukeyBiweight_no_log_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); void lm_wfit(double *x, double *y, double *w, int rows, int cols, double tol, double *out_beta, double *out_resids); void median_polish_fit_no_copy(double *data, int rows, int cols, double *r, double *c, double *t); void median_polish_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); void median_polish_log2_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); void median_polish_log2(double *data, int rows, int cols, double *results, double *resultsSE, double *residuals); void median_polish(double *data, int rows, int cols, double *results, double *resultsSE, double *residuals); void MedianPolish(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void MedianPolish_no_log(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void rlm_fit(double *x, double *y, int rows, int cols, double *out_beta, double *out_resids, double *out_weights, double (* PsiFn)(double, double, int), double psi_k, int max_iter,int initialized); void rlm_wfit(double *x, double *y, double *w, int rows, int cols, double *out_beta, double *out_resids, double *out_weights, double (* PsiFn)(double, double, int), double psi_k, int max_iter,int initialized); void rlm_fit_anova(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void rlm_wfit_anova(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void rlm_compute_se(double *X,double *Y, int n, int p, double *beta, double *resids,double *weights,double *se_estimates,double *varcov, double *residSE, int method,double (* PsiFn)(double, double, int), double psi_k); void rlm_compute_se_anova(double *Y, int y_rows,int y_cols, double *beta, double *resids,double *weights,double *se_estimates, double *varcov, double *residSE, int method,double (* PsiFn)(double, double, int), double psi_k); void MedianLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void MedianLog_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); void medianlog(double *data, int rows, int cols, double *results, double *resultsSE); void medianlog_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); void LogMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void LogMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void logmedian(double *data, int rows, int cols, double *results, double *resultsSE); void logmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); void colaverage_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); void colaverage(double *data, int rows, int cols, double *results, double *resultsSE); void ColAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void ColAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); void colmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); void colmedian(double *data, int rows, int cols, double *results, double *resultsSE); void ColMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void ColMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); void plmr_fit(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void plmr_wfit(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void plmrr_fit(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void plmrc_fit(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void plmrr_wfit(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void plmrc_wfit(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void rlm_fit_anova_given_probe_effects(double *y, int y_rows, int y_cols, double *probe_effects, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double,double, int), double psi_k,int max_iter, int initialized); void rlm_compute_se_anova_given_probe_effects(double *Y, int y_rows,int y_cols, double *probe_effects,double *beta, double *resids,double *weights,double *se_estimates, double *varcov, double *residSE, int method,double (* PsiFn)(double, double, int), double psi_k); void rlm_wfit_anova_given_probe_effects(double *y, int y_rows, int y_cols, double *probe_effects, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double,double, int), double psi_k,int max_iter, int initialized); preprocessCore/inst/include/qnorm.h0000644000126300012640000000171412127132757021052 0ustar00biocbuildphs_compbio#ifndef QNORM_H #define QNORM_H 1 #include #include #include #include int qnorm_c(double *data, int *rows, int *cols); int qnorm_robust_c(double *data,double *weights, int *rows, int *cols, int *use_median, int *use_log2, int *weight_scheme); int qnorm_c_using_target(double *data, int *rows, int *cols, double *target, int *targetrows); int qnorm_c_determine_target(double *data, int *rows, int *cols, double *target, int *targetrows); int qnorm_c_within_blocks(double *x, int *rows, int *cols, int *blocks); SEXP R_qnorm_c(SEXP X, SEXP copy); SEXP R_qnorm_robust_weights(SEXP X, SEXP remove_extreme, SEXP n_remove); SEXP R_qnorm_robust_c(SEXP X, SEXP copy, SEXP R_weights, SEXP R_use_median, SEXP R_use_log2, SEXP R_weight_scheme); SEXP R_qnorm_determine_target(SEXP X, SEXP targetlength); SEXP R_qnorm_using_target(SEXP X, SEXP target,SEXP copy); SEXP R_qnorm_within_blocks(SEXP X,SEXP blocks,SEXP copy); #endif preprocessCore/inst/include/rlm.h0000644000126300012640000001506412127132757020513 0ustar00biocbuildphs_compbio/*! \file rlm.h \brief Functions for fitting robust linear models */ #ifndef RLM_H #define RLM_H 1 /*! \brief robust linear regression fit using iteratively reweighted least squares * * * @param x Design matrix: dimension rows*cols * @param y dependent variable: length rows * @param rows dimension of input * @param cols dimension of input * @param out_beta place to output beta estimates: length cols * @param out_resids place to output residuals: length rows * @param out_weights place to output weights: length rows * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void rlm_fit(double *x, double *y, int rows, int cols, double *out_beta, double *out_resids, double *out_weights, double (* PsiFn)(double, double, int), double psi_k, int max_iter,int initialized); /*! \brief robust linear regression fit using iteratively reweighted least squares * * * @param x Design matrix: dimension rows*cols * @param y dependent variable: length rows * @param w weights for each observation: length rows * @param rows dimension of input * @param cols dimension of input * @param out_beta place to output beta estimates: length cols * @param out_resids place to output residuals: length rows * @param out_weights place to output weights: length rows * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void rlm_wfit(double *x, double *y, double *w, int rows, int cols, double *out_beta, double *out_resids, double *out_weights, double (* PsiFn)(double, double, int), double psi_k, int max_iter,int initialized); double med_abs(double *x, int length); /* double irls_delta(double *old, double *new, int length); */ /*! \brief robust linear regression fit row-colum model * * Fits the model y = cols + rows + errors with constraint sum rows = 0 * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void rlm_fit_anova(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); /*! \brief robust linear regression fit row-colum model * * Fits the model y = cols + rows + errors with constraint sum rows = 0 * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param w weights for each observation: length y_rows*y_cols * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void rlm_wfit_anova(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); /*! \brief apply row effects of a robust linear regression fit row-colum model * * Using pre-computed row effects from the model y = cols + rows + errors with constraint sum rows = 0 * apply on a single column by column basis to robustly estimate col effects for each column * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param probe_effects previously computed row effects with length y_rows. Assumed that it sums to 0. * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void rlm_fit_anova_given_probe_effects(double *y, int y_rows, int y_cols, double *probe_effects, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); /*! \brief apply row effects of a robust linear regression fit row-colum model * * Using pre-computed row effects from the model y = cols + rows + errors with constraint sum rows = 0 * apply on a single column by column basis to robustly estimate col effects for each column * * @param y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param probe_effects previously computed row effects with length y_rows. Assumed that it sums to 0. * @param w weights for each observation: length y_rows*y_cols * @param out_beta place to output beta estimates: length (y_rows + y_cols -1) * @param out_resids place to output residuals: length y_rows*y_cols * @param out_weights place to output weights: length y_rows*y_cols * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * @param max_iter maximum number of iterations (if don't converge before) * @param initialized do we have initial estimates of beta * */ void rlm_wfit_anova_given_probe_effects(double *y, int y_rows, int y_cols, double *probe_effects, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); #endif preprocessCore/inst/include/rlm_se.h0000644000126300012640000000750312127132757021201 0ustar00biocbuildphs_compbio/*! \file rlm_se.h \brief Functions for computing SE for fitted robust linear models */ #ifndef RLM_SE_H #define RLM_SE_H 1 /*! \brief Estimate SE for robust linear regression fit using iteratively reweighted least squares * * * @param X Design matrix: dimension n*p * @param Y dependent variable: length n * @param n dimension of input * @param p dimension of input * @param beta fitted parameter estimates: length p * @param resids estimated residuals: length n * @param weights estimated weights: length n * @param se_estimates on output contains standard error estimates : length p * @param varcov a place to store estimated variance covariance matrix: dimension p*p (or set to NULL) * @param residSE estimated residual standard error * @param method should be integer 1,2,3 or 4 (4 is the default). * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * */ void rlm_compute_se(double *X,double *Y, int n, int p, double *beta, double *resids,double *weights,double *se_estimates,double *varcov, double *residSE, int method,double (* PsiFn)(double, double, int), double psi_k); /*! \brief Estimate SE for robust linear regression fit using iteratively reweighted least squares * * Specialized to the model y = cols + rows + error model * * @param Y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param beta fitted parameter estimates: length y_rows + y_cols -1 * @param resids estimated residuals: length y_rows*y_cols * @param weights estimated weights: length y_rows*y_cols * @param se_estimates on output contains standard error estimates : y_rows + y_cols -1 * @param varcov a place to store estimated variance covariance matrix: dimension (y_rows + y_cols -1)*(y_rows + y_cols -1) (or set to NULL) * @param residSE estimated residual standard error * @param method should be integer 1,2,3 or 4 (4 is the default). * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * */ void rlm_compute_se_anova(double *Y, int y_rows,int y_cols, double *beta, double *resids,double *weights,double *se_estimates, double *varcov, double *residSE, int method,double (* PsiFn)(double, double, int), double psi_k); /*! \brief Estimate SE for robust linear regression fit using iteratively reweighted least squares * * Specialized to the model y = cols + rows + error model where the rows parameters are previously specified * designed to work independently for each column so can be applied "chip by chip" * * @param Y dependent variable: length y_rows*y_cols * @param y_rows dimension of input * @param y_cols dimension of input * @param probe_effects previously computed row effects with length y_rows. Assumed that it sums to 0. * @param beta fitted parameter estimates: length y_rows + y_cols -1 * @param resids estimated residuals: length y_rows*y_cols * @param weights estimated weights: length y_rows*y_cols * @param se_estimates on output contains standard error estimates : y_rows + y_cols -1 * @param varcov a place to store estimated variance covariance matrix: dimension (y_rows + y_cols -1)*(y_rows + y_cols -1) (or set to NULL) * @param residSE estimated residual standard error * @param method should be integer 1,2,3 or 4 (4 is the default). Currently ignored. * @param PsiFn a function used to determine weights based on standardized residuals * @param psi_k a tuning parameter for the PsiFn * */ void rlm_compute_se_anova_given_probe_effects(double *Y, int y_rows,int y_cols, double *probe_effects,double *beta, double *resids,double *weights,double *se_estimates, double *varcov, double *residSE, int method,double (* PsiFn)(double, double, int), double psi_k); #endif preprocessCore/inst/include/rma_background4.h0000644000126300012640000000455612127132757022767 0ustar00biocbuildphs_compbio/*! \file rma_background4.h \brief Functions for computing and carrying out the RMA convolution background method. */ #ifndef RMA_BACKGROUND4_H #define RMA_BACKGROUND4_H #include #include #include #include /*! \brief Compute the parameters for the RMA background correction model * * * Given a data matrix, compute the RMA convolution model parameters. The * three parameters estimated are alpha, mu and sigma (and are returned in that order) * * * * @param PM a matrix containing data stored column-wise stored in rows*cols length of memory * @param param a vector of length 3 where parameters for specified column will be stored on output * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param column specifies which column to compute parameters for. Should be 0-(cols-1) * */ void rma_bg_parameters(double *PM,double *param, int rows, int cols, int column); /*! \brief Carryout the RMA background correction for a specified column. * * * Given a data matrix and the RMA convolution model parameters for a specified column * adjust that column using the RMA convolution model. Ie E(Signal | Observed, model parameters) * * * * @param PM a matrix containing data stored column-wise stored in rows*cols length of memory * @param param a vector of length 3 where parameters for specified column will be stored on output * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param column specifies which column to compute parameters for. Should be 0-(cols-1) * */ void rma_bg_adjust(double *PM, double *param, int rows, int cols, int column); /*! \brief Carryout the RMA background correction for each column of a matrix * * * Given a data matrix background adjust each column using the RMA convolution model. Ie E(Signal | Observed, model parameters) where the model is Observed = Signal + background, signal is assumed to be exponential * and background is normally distributed. * * * * @param PM a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * */ void rma_bg_correct(double *PM, int rows, int cols); SEXP R_rma_bg_correct(SEXP PMmat,SEXP copy); #endif preprocessCore/inst/include/weightedkerneldensity_stubs.c0000644000126300012640000000125612127132757025533 0ustar00biocbuildphs_compbio#include #include #ifndef WEIGHTEDKERNELDENSITY_STUBS_H #define WEIGHTEDKERNELDENSITY_STUBS_H 1 void KernelDensity(double *x, int *nxxx, double *weights, double *output, double *output_x, int *nout, int *kernel_fn, int *bandwidth_fn, double *bandwidth_adj){ static void(*fun)(double*, int*, double*, double*, double*, int *, int *, int *, double *) = NULL; if (fun == NULL) fun = (void(*)(double*, int*, double*, double*, double*, int *, int *, int *, double *))R_GetCCallable("preprocessCore","KernelDensity"); fun(x, nxxx, weights, output, output_x, nout, kernel_fn, bandwidth_fn, bandwidth_adj); return; } #endif preprocessCore/inst/include/weightedkerneldensity_stubs.h0000644000126300012640000000024212127132757025532 0ustar00biocbuildphs_compbiovoid KernelDensity(double *x, int *nxxx, double *weights, double *output, double *output_x, int *nout, int *kernel_fn, int *bandwidth_fn, double *bandwidth_adj); preprocessCore/man/0000755000126300012640000000000012127132756015714 5ustar00biocbuildphs_compbiopreprocessCore/man/colSummarize.Rd0000644000126300012640000000472712127132756020667 0ustar00biocbuildphs_compbio\name{colSumamrize} \alias{colSummarizeAvg} \alias{colSummarizeAvgLog} \alias{colSummarizeBiweight} \alias{colSummarizeBiweightLog} \alias{colSummarizeLogAvg} \alias{colSummarizeLogMedian} \alias{colSummarizeMedian} \alias{colSummarizeMedianLog} \alias{colSummarizeMedianpolish} \alias{colSummarizeMedianpolishLog} \title{Summarize the column of matrices} \description{Compute column wise summary values of a matrix. } \usage{ colSummarizeAvg(y) colSummarizeAvgLog(y) colSummarizeBiweight(y) colSummarizeBiweightLog(y) colSummarizeLogAvg(y) colSummarizeLogMedian(y) colSummarizeMedian(y) colSummarizeMedianLog(y) colSummarizeMedianpolish(y) colSummarizeMedianpolishLog(y) } \arguments{ \item{y}{A numeric matrix} } \value{ A list with following items: \item{Estimates}{Summary values for each column.} \item{StdErrors}{Standard error estimates.} } \details{This groups of functions summarize the columns of a given matrices. \itemize{ \item{\code{colSummarizeAvg}}{Take means in column-wise manner} \item{\code{colSummarizeAvgLog}}{\code{log2} transform the data and then take means in column-wise manner} \item{\code{colSummarizeBiweight}}{Summarize each column using a one step Tukey Biweight procedure} \item{\code{colSummarizeBiweightLog}}{\code{log2} transform the data and then summarize each column using a one step Tuke Biweight procedure} \item{\code{colSummarizeLogAvg}}{Compute the mean of each column and then \code{log2} transform it} \item{\code{colSummarizeLogMedian}}{Compute the median of each column and then \code{log2} transform it} \item{\code{colSummarizeMedian}}{Compute the median of each column} \item{\code{colSummarizeMedianLog}}{\code{log2} transform the data and then summarize each column using the median} \item{\code{colSummarizeMedianpolish}}{Use the median polish to summarize each column, by also using a row effect (not returned)} \item{\code{colSummarizeMedianpolishLog}}{\code{log2} transform the data and then use the median polish to summarize each column, by also using a row effect (not returned)} } } \examples{ y <- matrix(10+rnorm(100),20,5) colSummarizeAvg(y) colSummarizeAvgLog(y) colSummarizeBiweight(y) colSummarizeBiweightLog(y) colSummarizeLogAvg(y) colSummarizeLogMedian(y) colSummarizeMedian(y) colSummarizeMedianLog(y) colSummarizeMedianpolish(y) colSummarizeMedianpolishLog(y) } \author{B. M. Bolstad \email{bmb@bmbolstad.com}} \keyword{univar}preprocessCore/man/normalize.quantiles.Rd0000644000126300012640000000327512127132756022216 0ustar00biocbuildphs_compbio\name{normalize.quantiles} \alias{normalize.quantiles} \title{Quantile Normalization} \description{ Using a normalization based upon quantiles, this function normalizes a matrix of probe level intensities. } \usage{ normalize.quantiles(x,copy=TRUE) } \arguments{ \item{x}{A matrix of intensities where each column corresponds to a chip and each row is a probe.} \item{copy}{Make a copy of matrix before normalizing. Usually safer to work with a copy, but in certain situations not making a copy of the matrix, but instead normalizing it in place will be more memory friendly.} } \details{This method is based upon the concept of a quantile-quantile plot extended to n dimensions. No special allowances are made for outliers. If you make use of quantile normalization please cite Bolstad et al, Bioinformatics (2003). This functions will handle missing data (ie NA values), based on the assumption that the data is missing at random. Note that the current implementation optimizes for better memory usage at the cost of some additional run-time. } \value{ A normalized \code{matrix}. } \references{ Bolstad, B (2001) \emph{Probe Level Quantile Normalization of High Density Oligonucleotide Array Data}. Unpublished manuscript \url{http://bmbolstad.com/stuff/qnorm.pdf} Bolstad, B. M., Irizarry R. A., Astrand, M, and Speed, T. P. (2003) \emph{A Comparison of Normalization Methods for High Density Oligonucleotide Array Data Based on Bias and Variance.} Bioinformatics 19(2) ,pp 185-193. \url{http://bmbolstad.com/misc/normalize/normalize.html} } \author{Ben Bolstad, \email{bmbolstad.com}} \seealso{\code{\link{normalize.quantiles.robust}}} \keyword{manip} preprocessCore/man/normalize.quantiles.in.blocks.Rd0000644000126300012640000000431612127132756024074 0ustar00biocbuildphs_compbio\name{normalize.quantiles.in.blocks} \alias{normalize.quantiles.in.blocks} \title{Quantile Normalization carried out separately within blocks of rows} \description{ Using a normalization based upon quantiles this function normalizes the columns of a matrix such that different subsets of rows get normalized together. } \usage{ normalize.quantiles.in.blocks(x,blocks,copy=TRUE) } \arguments{ \item{x}{A matrix of intensities where each column corresponds to a chip and each row is a probe.} \item{copy}{Make a copy of matrix before normalizing. Usually safer to work with a copy} \item{blocks}{A vector giving block membership for each each row} } \details{This method is based upon the concept of a quantile-quantile plot extended to n dimensions. No special allowances are made for outliers. If you make use of quantile normalization either through \code{\link[affy]{rma}} or \code{\link[affy]{expresso}} please cite Bolstad et al, Bioinformatics (2003). } \value{ From \code{normalize.quantiles.use.target} a normalized \code{matrix}. } \references{ Bolstad, B (2001) \emph{Probe Level Quantile Normalization of High Density Oligonucleotide Array Data}. Unpublished manuscript \url{http://bmbolstad.com/stuff/qnorm.pdf} Bolstad, B. M., Irizarry R. A., Astrand, M, and Speed, T. P. (2003) \emph{A Comparison of Normalization Methods for High Density Oligonucleotide Array Data Based on Bias and Variance.} Bioinformatics 19(2) ,pp 185-193. \url{http://bmbolstad.com/misc/normalize/normalize.html} } \examples{ ### setup the data blocks <- c(rep(1,5),rep(2,5),rep(3,5)) par(mfrow=c(3,2)) x <- matrix(c(rexp(5,0.05),rnorm(5),rnorm(5,10))) boxplot(x ~ blocks) y <- matrix(c(-rexp(5,0.05),rnorm(5,10),rnorm(5))) boxplot(y ~ blocks) pre.norm <- cbind(x,y) ### the in.blocks version post.norm <- normalize.quantiles.in.blocks(pre.norm,blocks) boxplot(post.norm[,1] ~ blocks) boxplot(post.norm[,2] ~ blocks) ### the usual version post.norm <- normalize.quantiles(pre.norm) boxplot(post.norm[,1] ~ blocks) boxplot(post.norm[,2] ~ blocks) } \author{Ben Bolstad, \email{bmb@bmbolstad.com}} \seealso{\code{\link[affy]{normalize}}} \keyword{manip} preprocessCore/man/normalize.quantiles.robust.Rd0000644000126300012640000000406212127132756023526 0ustar00biocbuildphs_compbio\name{normalize.quantiles.robust} \alias{normalize.quantiles.robust} \alias{normalize.AffyBatch.quantiles.robust} \title{Robust Quantile Normalization} \description{Using a normalization based upon quantiles, this function normalizes a matrix of probe level intensities. Allows weighting of chips} \usage{ normalize.quantiles.robust(x,copy=TRUE,weights=NULL, remove.extreme=c("variance","mean","both","none"), n.remove=1,use.median=FALSE,use.log2=FALSE) } \arguments{ \item{x}{A matrix of intensities, columns are chips, rows are probes} \item{copy}{Make a copy of matrix before normalizing. Usually safer to work with a copy} \item{weights}{A vector of weights, one for each chip} \item{remove.extreme}{If weights is null, then this will be used for determining which chips to remove from the calculation of the normalization distribution, See details for more info} \item{n.remove}{number of chips to remove} \item{use.median}{if TRUE use the median to compute normalization chip, otherwise uses a weighted mean} \item{use.log2}{work on log2 scale. This means we will be using the geometric mean rather than ordinary mean} } \details{This method is based upon the concept of a quantile-quantile plot extended to n dimensions. Note that the matrix is of intensities not log intensities. The function performs better with raw intensities. Choosing \bold{variance} will remove chips with variances much higher or lower than the other chips, \bold{mean} removes chips with the mean most different from all the other means, \bold{both} removes first extreme variance and then an extreme mean. The option \bold{none} does not remove any chips, but will assign equal weights to all chips. Note that this function does not handle missing values (ie NA). Unexpected results might occur in this situation. } \note{This function is still experimental.} \value{a matrix of normalized intensites} \author{Ben Bolstad, \email{bmb@bmbolstad.com}} \seealso{\code{\link{normalize.quantiles}}} \keyword{manip} preprocessCore/man/normalize.quantiles.target.Rd0000644000126300012640000000430412127132756023475 0ustar00biocbuildphs_compbio\name{normalize.quantiles.target} \alias{normalize.quantiles.use.target} \alias{normalize.quantiles.determine.target} \title{Quantile Normalization using a specified target distribution vector} \description{ Using a normalization based upon quantiles, these function normalizes the columns of a matrix based upon a specified normalization distribution } \usage{ normalize.quantiles.use.target(x,target,copy=TRUE,subset=NULL) normalize.quantiles.determine.target(x,target.length=NULL,subset=NULL) } \arguments{ \item{x}{A matrix of intensities where each column corresponds to a chip and each row is a probe.} \item{copy}{Make a copy of matrix before normalizing. Usually safer to work with a copy} \item{target}{A vector containing datapoints from the distribution to be normalized to} \item{target.length}{number of datapoints to return in target distribution vector. If \code{NULL} then this will be taken to be equal to the number of rows in the matrix.} \item{subset}{A logical variable indexing whether corresponding row should be used in reference distribution determination} } \details{This method is based upon the concept of a quantile-quantile plot extended to n dimensions. No special allowances are made for outliers. If you make use of quantile normalization either through \code{\link[affy]{rma}} or \code{\link[affy]{expresso}} please cite Bolstad et al, Bioinformatics (2003). These functions will handle missing data (ie NA values), based on the assumption that the data is missing at random. } \value{ From \code{normalize.quantiles.use.target} a normalized \code{matrix}. } \references{ Bolstad, B (2001) \emph{Probe Level Quantile Normalization of High Density Oligonucleotide Array Data}. Unpublished manuscript \url{http://bmbolstad.com/stuff/qnorm.pdf} Bolstad, B. M., Irizarry R. A., Astrand, M, and Speed, T. P. (2003) \emph{A Comparison of Normalization Methods for High Density Oligonucleotide Array Data Based on Bias and Variance.} Bioinformatics 19(2) ,pp 185-193. \url{http://bmbolstad.com/misc/normalize/normalize.html} } \author{Ben Bolstad, \email{bmb@bmbolstad.com}} \seealso{\code{\link[affy]{normalize}}} \keyword{manip} preprocessCore/man/rcModelPLMd.Rd0000644000126300012640000000447212127132756020314 0ustar00biocbuildphs_compbio\name{rcModelPLMd} \alias{rcModelPLMd} \title{Fit robust row-column models to a matrix} \description{These functions fit row-column effect models to matrices using PLM-d} \usage{ rcModelPLMd(y,group.labels) } \arguments{ \item{y}{A numeric matrix} \item{group.labels}{A vector of group labels. Of length \code{ncol(y)}} } \value{ A list with following items: \item{Estimates}{The parameter estimates. Stored in column effect then row effect order} \item{Weights}{The final weights used} \item{Residuals}{The residuals} \item{StdErrors}{Standard error estimates. Stored in column effect then row effect order} \item{WasSplit}{An indicator variable indicating whether or not a row was split with separate row effects for each group} } \details{ This functions first tries to fit row-column models to the specified input matrix. Specifically the model \deqn{y_{ij} = r_i + c_j + \epsilon_{ij}}{y_ij = r_i + c_j + e_ij} with \eqn{r_i} and \eqn{c_j} as row and column effects respectively. Note that these functions treat the row effect as the parameter to be constrained using sum to zero. Next the residuals for each row are compared to the group variable. In cases where there appears to be a significant relationship, the row-effect is "split" and separate row-effect parameters, one for each group, replace the single row effect. } \seealso{ } \examples{ col.effects <- c(10,11,10.5,12,9.5) row.effects <- c(seq(-0.5,-0.1,by=0.1),seq(0.1,0.5,by=0.1)) y <- outer(row.effects, col.effects,"+") y <- y + rnorm(50,sd=0.1) rcModelPLMd(y,group.labels=c(1,1,2,2,2)) row.effects <- c(4,3,2,1,-1,-2,-3,-4) col.effects <- c(8,9,10,11,12,10) y <- outer(row.effects, col.effects,"+") + rnorm(48,0,0.25) y[8,4:6] <- c(11,12,10)+ 2.5 + rnorm(3,0,0.25) y[5,4:6] <- c(11,12,10)+-2.5 + rnorm(3,0,0.25) rcModelPLMd(y,group.labels=c(1,1,1,2,2,2)) par(mfrow=c(2,2)) matplot(y,type="l",col=c(rep("red",3),rep("blue",3)),ylab="residuals",xlab="probe",main="Observed Data") matplot(rcModelPLM(y)$Residuals,col=c(rep("red",3),rep("blue",3)),ylab="residuals",xlab="probe",main="Residuals (PLM)") matplot(rcModelPLMd(y,group.labels=c(1,1,1,2,2,2))$Residuals,col=c(rep("red",3),rep("blue",3)),xlab="probe",ylab="residuals",main="Residuals (PLM-d)") } \author{B. M. Bolstad \email{bmb@bmbolstad.com}} \keyword{models}preprocessCore/man/rcModelPLMr.Rd0000644000126300012640000000730112127132756020324 0ustar00biocbuildphs_compbio\name{rcModelPLMr} \alias{rcModelPLMr} \alias{rcModelPLMrr} \alias{rcModelPLMrc} \alias{rcModelWPLMr} \alias{rcModelWPLMrr} \alias{rcModelWPLMrc} \title{Fit robust row-column models to a matrix} \description{These functions fit row-column effect models to matrices using PLM-r and variants } \usage{ rcModelPLMr(y) rcModelPLMrr(y) rcModelPLMrc(y) rcModelWPLMr(y, w) rcModelWPLMrr(y, w) rcModelWPLMrc(y, w) } \arguments{ \item{y}{A numeric matrix} \item{w}{A matrix or vector of weights. These should be non-negative.} } \value{ A list with following items: \item{Estimates}{The parameter estimates. Stored in column effect then row effect order} \item{Weights}{The final weights used} \item{Residuals}{The residuals} \item{StdErrors}{Standard error estimates. Stored in column effect then row effect order} } \details{ These functions fit row-column models to the specified input matrix. Specifically the model \deqn{y_{ij} = r_i + c_j + \epsilon_{ij}}{y_ij = r_i + c_j + e_ij} with \eqn{r_i} and \eqn{c_j} as row and column effects respectively. Note that these functions treat the row effect as the parameter to be constrained using sum to zero. The \code{rcModelPLMr} and \code{rcModelWPLMr} functions use the PLM-r fitting procedure. This adds column and row robustness to single element robustness. The \code{rcModelPLMrc} and \code{rcModelWPLMrc} functions use the PLM-rc fitting procedure. This adds column robustness to single element robustness. The \code{rcModelPLMrr} and \code{rcModelWPLMrr} functions use the PLM-rr fitting procedure. This adds row robustness to single element robustness. } \seealso{ } \examples{ col.effects <- c(10,11,10.5,12,9.5) row.effects <- c(seq(-0.5,-0.1,by=0.1),seq(0.1,0.5,by=0.1)) y <- outer(row.effects, col.effects,"+") w <- runif(50) rcModelPLMr(y) rcModelWPLMr(y, w) ### An example where there no or only occasional outliers y <- y + rnorm(50,sd=0.1) par(mfrow=c(2,2)) image(1:10,1:5,rcModelPLMr(y)$Weights,xlab="row",ylab="col",main="PLM-r",zlim=c(0,1)) image(1:10,1:5,rcModelPLMrc(y)$Weights,xlab="row",ylab="col",main="PLM-rc",zlim=c(0,1)) image(1:10,1:5,rcModelPLMrr(y)$Weights,xlab="row",ylab="col",main="PLM-rr",zlim=c(0,1)) matplot(y,type="l") ### An example where there is a row outlier y <- outer(row.effects, col.effects,"+") y[1,] <- 11+ rnorm(5) y <- y + rnorm(50,sd=0.1) par(mfrow=c(2,2)) image(1:10,1:5,rcModelPLMr(y)$Weights,xlab="row",ylab="col",main="PLM-r",zlim=c(0,1)) image(1:10,1:5,rcModelPLMrc(y)$Weights,xlab="row",ylab="col",main="PLM-rc",zlim=c(0,1)) image(1:10,1:5,rcModelPLMrr(y)$Weights,xlab="row",ylab="col",main="PLM-rr",zlim=c(0,1)) matplot(y,type="l") ### An example where there is a column outlier y <- outer(row.effects, col.effects,"+") w <- rep(1,50) y[,4] <- 12 + rnorm(10) y <- y + rnorm(50,sd=0.1) par(mfrow=c(2,2)) image(1:10,1:5,rcModelWPLMr(y,w)$Weights,xlab="row",ylab="col",main="PLM-r",zlim=c(0,1)) image(1:10,1:5,rcModelWPLMrc(y,w)$Weights,xlab="row",ylab="col",main="PLM-rc",zlim=c(0,1)) image(1:10,1:5,rcModelWPLMrr(y,w)$Weights,xlab="row",ylab="col",main="PLM-rr",zlim=c(0,1)) matplot(y,type="l") ### An example where there is both column and row outliers y <- outer(row.effects, col.effects,"+") w <- rep(1,50) y[,4] <- 12 + rnorm(10) y[1,] <- 11+ rnorm(5) y <- y + rnorm(50,sd=0.1) par(mfrow=c(2,2)) image(1:10,1:5,rcModelWPLMr(y,w)$Weights,xlab="row",ylab="col",main="PLM-r",zlim=c(0,1)) image(1:10,1:5,rcModelWPLMrc(y,w)$Weights,xlab="row",ylab="col",main="PLM-rc",zlim=c(0,1)) image(1:10,1:5,rcModelWPLMrr(y,w)$Weights,xlab="row",ylab="col",main="PLM-rr",zlim=c(0,1)) matplot(y,type="l") } \author{B. M. Bolstad \email{bmb@bmbolstad.com}} \keyword{models}preprocessCore/man/rcModels.Rd0000644000126300012640000000444712127132756017764 0ustar00biocbuildphs_compbio\name{rcModels} \alias{rcModelPLM} \alias{rcModelWPLM} \alias{rcModelMedianPolish} \title{Fit row-column model to a matrix} \description{These functions fit row-column effect models to matrices } \usage{ rcModelPLM(y,row.effects=NULL,input.scale=NULL) rcModelWPLM(y, w,row.effects=NULL,input.scale=NULL) rcModelMedianPolish(y) } \arguments{ \item{y}{A numeric matrix} \item{w}{A matrix or vector of weights. These should be non-negative.} \item{row.effects}{If these are supplied then the fitting procedure uses these (and analyzes individual columns separately)} \item{input.scale}{If supplied will be used rather than estimating the scale from the data} } \value{ A list with following items: \item{Estimates}{The parameter estimates. Stored in column effect then row effect order} \item{Weights}{The final weights used} \item{Residuals}{The residuals} \item{StdErrors}{Standard error estimates. Stored in column effect then row effect order} \item{Scale}{Scale Estimates} } \details{ These functions fit row-column models to the specified input matrix. Specifically the model \deqn{y_{ij} = r_i + c_j + \epsilon_{ij}}{y_ij = r_i + c_j + e_ij} with \eqn{r_i} and \eqn{c_j} as row and column effects respectively. Note that this functions treat the row effect as the parameter to be constrained using sum to zero (for \code{rcModelPLM} and \code{rcModelWPLM}) or median of zero (for \code{rcModelMedianPolish}). The \code{rcModelPLM} and \code{rcModelWPLM} functions use a robust linear model procedure for fitting the model. The function \code{rcModelMedianPolish} uses the median polish algorithm. } \seealso{ } \examples{ col.effects <- c(10,11,10.5,12,9.5) row.effects <- c(seq(-0.5,-0.1,by=0.1),seq(0.1,0.5,by=0.1)) y <- outer(row.effects, col.effects,"+") w <- runif(50) rcModelPLM(y) rcModelWPLM(y, w) rcModelMedianPolish(y) y <- y + rnorm(50) rcModelPLM(y) rcModelWPLM(y, w) rcModelMedianPolish(y) rcModelPLM(y,row.effects=row.effects) rcModelWPLM(y,w,row.effects=row.effects) rcModelPLM(y,input.scale=1.0) rcModelWPLM(y, w,input.scale=1.0) rcModelPLM(y,row.effects=row.effects,input.scale=1.0) rcModelWPLM(y,w,row.effects=row.effects,input.scale=1.0) } \author{B. M. Bolstad \email{bmb@bmbolstad.com}} \keyword{models}preprocessCore/man/rma.background.correct.Rd0000644000126300012640000000213712127132756022543 0ustar00biocbuildphs_compbio\name{rma.background.correct} \alias{rma.background.correct} \title{RMA Background Correction} \description{Background correct each column of a matrix } \usage{ rma.background.correct(x,copy=TRUE) } \arguments{ \item{x}{A matrix of intensities where each column corresponds to a chip and each row is a probe.} \item{copy}{Make a copy of matrix before background correctiong. Usually safer to work with a copy, but in certain situations not making a copy of the matrix, but instead background correcting it in place will be more memory friendly.} } \details{ Assumes PMs are a convolution of normal and exponentional. So we observe X+Y where X is backround and Y is signal. \code{bg.adjust} returns E[Y|X+Y, Y>0] as our backround corrected PM. } \value{ A RMA background corrected \code{matrix}. } \references{ Bolstad, BM (2004) \emph{Low Level Analysis of High-density Oligonucleotide Array Data: Background, Normalization and Summarization}. PhD Dissertation. University of California, Berkeley. pp 17-21 } \author{Ben Bolstad, \email{bmbolstad.com}} \keyword{manip} preprocessCore/man/subColSummarize.Rd0000644000126300012640000000667212127132756021342 0ustar00biocbuildphs_compbio\name{subColSummarize} \alias{subColSummarizeAvg} \alias{subColSummarizeAvgLog} \alias{subColSummarizeBiweight} \alias{subColSummarizeBiweightLog} \alias{subColSummarizeLogAvg} \alias{subColSummarizeLogMedian} \alias{subColSummarizeMedian} \alias{subColSummarizeMedianLog} \alias{subColSummarizeMedianpolish} \alias{subColSummarizeMedianpolishLog} \alias{convert.group.labels} \title{Summarize columns when divided into groups of rows} \description{These functions summarize columns of a matrix when the rows of the matrix are classified into different groups } \usage{subColSummarizeAvg(y, group.labels) subColSummarizeAvgLog(y, group.labels) subColSummarizeBiweight(y, group.labels) subColSummarizeBiweightLog(y, group.labels) subColSummarizeLogAvg(y, group.labels) subColSummarizeLogMedian(y, group.labels) subColSummarizeMedian(y, group.labels) subColSummarizeMedianLog(y, group.labels) subColSummarizeMedianpolish(y, group.labels) subColSummarizeMedianpolishLog(y, group.labels) convert.group.labels(group.labels) } \arguments{ \item{y}{A numeric \code{\link{matrix}}} \item{group.labels}{A vector to be treated as a factor variable. This is used to assign each row to a group. NA values should be used to exclude rows from consideration} } \value{ A \code{\link{matrix}} containing column summarized data. Each row corresponds to data column summarized over a group of rows. } \details{ These functions are designed to summarize the columns of a matrix where the rows of the matrix are assigned to groups. The summarization is by column across all rows in each group. \itemize{ \item{subColSummarizeAvg}{Summarize by taking mean} \item{subColSummarizeAvgLog}{\code{log2} transform the data and then take means in column-wise manner} \item{subColSummarizeBiweight}{Use a one-step Tukey Biweight to summarize columns} \item{subColSummarizeBiweightLog}{\code{log2} transform the data and then use a one-step Tukey Biweight to summarize columns} \item{subColSummarizeLogAvg}{Summarize by taking mean and then taking \code{log2}} \item{subColSummarizeLogMedian}{Summarize by taking median and then taking \code{log2}} \item{subColSummarizeMedian}{Summarize by taking median} \item{subColSummarizeMedianLog}{\code{log2} transform the data and then summarize by taking median} \item{subColSummarizeMedianpolish}{Use the median polish to summarize each column, by also using a row effect (not returned)} \item{subColSummarizeMedianpolishLog}{\code{log2} transform the data and then use the median polish to summarize each column, by also using a row effect (not returned)} } } \examples{ ### Assign the first 10 rows to one group and ### the second 10 rows to the second group ### y <- matrix(c(10+rnorm(50),20+rnorm(50)),20,5,byrow=TRUE) subColSummarizeAvgLog(y,c(rep(1,10),rep(2,10))) subColSummarizeLogAvg(y,c(rep(1,10),rep(2,10))) subColSummarizeAvg(y,c(rep(1,10),rep(2,10))) subColSummarizeBiweight(y,c(rep(1,10),rep(2,10))) subColSummarizeBiweightLog(y,c(rep(1,10),rep(2,10))) subColSummarizeMedianLog(y,c(rep(1,10),rep(2,10))) subColSummarizeLogMedian(y,c(rep(1,10),rep(2,10))) subColSummarizeMedian(y,c(rep(1,10),rep(2,10))) subColSummarizeMedianpolishLog(y,c(rep(1,10),rep(2,10))) subColSummarizeMedianpolish(y,c(rep(1,10),rep(2,10))) } \author{B. M. Bolstad } \keyword{univar}preprocessCore/man/subrcModels.Rd0000644000126300012640000000525112127132756020470 0ustar00biocbuildphs_compbio\name{subrcModels} \alias{subrcModelPLM} \alias{subrcModelWPLM} \alias{subrcModelMedianPolish} \title{Fit row-column model to a matrix} \description{These functions fit row-column effect models to matrices } \usage{ subrcModelPLM(y, group.labels,row.effects=NULL,input.scale=NULL) %subrcModelWPLM(y, w,row.effects=NULL,input.scale=NULL) subrcModelMedianPolish(y, group.labels) } \arguments{ \item{y}{A numeric matrix} \item{group.labels}{A vector to be treated as a factor variable. This is used to assign each row to a group. NA values should be used to exclude rows from consideration} % \item{w}{A matrix or vector of weights. These should be non-negative.} \item{row.effects}{If these are supplied then the fitting procedure uses these (and analyzes individual columns separately)} \item{input.scale}{If supplied will be used rather than estimating the scale from the data} } \value{ A list with following items: \item{Estimates}{The parameter estimates. Stored in column effect then row effect order} \item{Weights}{The final weights used} \item{Residuals}{The residuals} \item{StdErrors}{Standard error estimates. Stored in column effect then row effect order} \item{Scale}{Scale Estimates} } \details{ These functions fit row-column models to the specified input matrix. Specifically the model \deqn{y_{ij} = r_i + c_j + \epsilon_{ij}}{y_ij = r_i + c_j + e_ij} with \eqn{r_i} and \eqn{c_j} as row and column effects respectively. Note that this functions treat the row effect as the parameter to be constrained using sum to zero (for \code{rcModelPLM} and \code{rcModelWPLM}) or median of zero (for \code{rcModelMedianPolish}). The \code{rcModelPLM} and \code{rcModelWPLM} functions use a robust linear model procedure for fitting the model. The function \code{rcModelMedianPolish} uses the median polish algorithm. } \seealso{ } \examples{ y <- matrix(c(10+rnorm(50),20+rnorm(50)),20,5,byrow=TRUE) subrcModelPLM(y,c(rep(1,10),rep(2,10))) subrcModelMedianPolish(y,c(rep(1,10),rep(2,10))) col.effects <- c(10,11,10.5,12,9.5) row.effects <- c(seq(-0.5,-0.1,by=0.1),seq(0.1,0.5,by=0.1)) y <- outer(row.effects, col.effects,"+") w <- runif(50) rcModelPLM(y) rcModelWPLM(y, w) rcModelMedianPolish(y) y <- y + rnorm(50) rcModelPLM(y) rcModelWPLM(y, w) rcModelMedianPolish(y) rcModelPLM(y,row.effects=row.effects) rcModelWPLM(y,w,row.effects=row.effects) rcModelPLM(y,input.scale=1.0) rcModelWPLM(y, w,input.scale=1.0) rcModelPLM(y,row.effects=row.effects,input.scale=1.0) rcModelWPLM(y,w,row.effects=row.effects,input.scale=1.0) } \author{B. M. Bolstad \email{bmb@bmbolstad.com}} \keyword{models} preprocessCore/src/0000755000126300012640000000000012127132755015727 5ustar00biocbuildphs_compbiopreprocessCore/src/Makevars0000644000126300012640000000006212127132755017421 0ustar00biocbuildphs_compbio PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) preprocessCore/src/Makevars.in0000644000126300012640000000015712127132755020033 0ustar00biocbuildphs_compbioPKG_CFLAGS = @CFLAGS@ @DEFS@ PKG_LIBS = @LIBS@ $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) PKG_CPPFLAGS = @CPPFLAGS@ preprocessCore/src/R_colSummarize.c0000644000126300012640000002747012127220006021023 0ustar00biocbuildphs_compbio/********************************************************************* ** ** file: R_colSummarize.c ** ** Aim: Code which provides .Call() interfaces to the column ** summarization code. ** ** Copyright (C) 2007 Ben Bolstad ** ** created by: B. M. Bolstad ** ** created on: Sep 15, 2007 ** ** History ** Sep 15, 2007 - Initial version ** Jan 15, 2009 - Fix issues with VECTOR_ELT/STRING_ELT ** ** *********************************************************************/ #include #include #include #include #include "R_colSummarize.h" #include "avg_log.h" #include "log_avg.h" #include "avg.h" #include "log_median.h" #include "median_log.h" #include "median.h" #include "biweight.h" #include "medianpolish.h" SEXP R_colSummarize_avg_log(SEXP RMatrix){ SEXP R_return_value; SEXP R_return_value_names; SEXP R_summaries; SEXP R_summaries_se; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *resultsSE; int rows, cols; PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,2)); PROTECT(R_summaries = allocVector(REALSXP,cols)); PROTECT(R_summaries_se = allocVector(REALSXP,cols)); SET_VECTOR_ELT(R_return_value,0,R_summaries); SET_VECTOR_ELT(R_return_value,1,R_summaries_se); results = NUMERIC_POINTER(R_summaries); resultsSE = NUMERIC_POINTER(R_summaries_se); averagelog(matrix, rows, cols, results, resultsSE); PROTECT(R_return_value_names= allocVector(STRSXP,2)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(1); UNPROTECT(3); return R_return_value; } SEXP R_colSummarize_log_avg(SEXP RMatrix){ SEXP R_return_value; SEXP R_return_value_names; SEXP R_summaries; SEXP R_summaries_se; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *resultsSE; int rows, cols; PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,2)); PROTECT(R_summaries = allocVector(REALSXP,cols)); PROTECT(R_summaries_se = allocVector(REALSXP,cols)); SET_VECTOR_ELT(R_return_value,0,R_summaries); SET_VECTOR_ELT(R_return_value,1,R_summaries_se); results = NUMERIC_POINTER(R_summaries); resultsSE = NUMERIC_POINTER(R_summaries_se); logaverage(matrix, rows, cols, results, resultsSE); PROTECT(R_return_value_names= allocVector(STRSXP,2)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(1); UNPROTECT(3); return R_return_value; } SEXP R_colSummarize_avg(SEXP RMatrix){ SEXP R_return_value; SEXP R_return_value_names; SEXP R_summaries; SEXP R_summaries_se; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *resultsSE; int rows, cols; PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,2)); PROTECT(R_summaries = allocVector(REALSXP,cols)); PROTECT(R_summaries_se = allocVector(REALSXP,cols)); SET_VECTOR_ELT(R_return_value,0,R_summaries); SET_VECTOR_ELT(R_return_value,1,R_summaries_se); results = NUMERIC_POINTER(R_summaries); resultsSE = NUMERIC_POINTER(R_summaries_se); colaverage(matrix, rows, cols, results, resultsSE); PROTECT(R_return_value_names= allocVector(STRSXP,2)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(1); UNPROTECT(3); return R_return_value; } SEXP R_colSummarize_log_median(SEXP RMatrix){ SEXP R_return_value; SEXP R_return_value_names; SEXP R_summaries; SEXP R_summaries_se; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *resultsSE; int rows, cols; PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,2)); PROTECT(R_summaries = allocVector(REALSXP,cols)); PROTECT(R_summaries_se = allocVector(REALSXP,cols)); SET_VECTOR_ELT(R_return_value,0,R_summaries); SET_VECTOR_ELT(R_return_value,1,R_summaries_se); results = NUMERIC_POINTER(R_summaries); resultsSE = NUMERIC_POINTER(R_summaries_se); logmedian(matrix, rows, cols, results, resultsSE); PROTECT(R_return_value_names= allocVector(STRSXP,2)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(1); UNPROTECT(3); return R_return_value; } SEXP R_colSummarize_median_log(SEXP RMatrix){ SEXP R_return_value; SEXP R_return_value_names; SEXP R_summaries; SEXP R_summaries_se; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *resultsSE; int rows, cols; PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,2)); PROTECT(R_summaries = allocVector(REALSXP,cols)); PROTECT(R_summaries_se = allocVector(REALSXP,cols)); SET_VECTOR_ELT(R_return_value,0,R_summaries); SET_VECTOR_ELT(R_return_value,1,R_summaries_se); results = NUMERIC_POINTER(R_summaries); resultsSE = NUMERIC_POINTER(R_summaries_se); medianlog(matrix, rows, cols, results, resultsSE); PROTECT(R_return_value_names= allocVector(STRSXP,2)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(1); UNPROTECT(3); return R_return_value; } SEXP R_colSummarize_median(SEXP RMatrix){ SEXP R_return_value; SEXP R_return_value_names; SEXP R_summaries; SEXP R_summaries_se; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *resultsSE; int rows, cols; PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,2)); PROTECT(R_summaries = allocVector(REALSXP,cols)); PROTECT(R_summaries_se = allocVector(REALSXP,cols)); SET_VECTOR_ELT(R_return_value,0,R_summaries); SET_VECTOR_ELT(R_return_value,1,R_summaries_se); results = NUMERIC_POINTER(R_summaries); resultsSE = NUMERIC_POINTER(R_summaries_se); colmedian(matrix, rows, cols, results, resultsSE); PROTECT(R_return_value_names= allocVector(STRSXP,2)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(1); UNPROTECT(3); return R_return_value; } SEXP R_colSummarize_biweight_log(SEXP RMatrix){ SEXP R_return_value; SEXP R_return_value_names; SEXP R_summaries; SEXP R_summaries_se; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *resultsSE; int rows, cols; PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,2)); PROTECT(R_summaries = allocVector(REALSXP,cols)); PROTECT(R_summaries_se = allocVector(REALSXP,cols)); SET_VECTOR_ELT(R_return_value,0,R_summaries); SET_VECTOR_ELT(R_return_value,1,R_summaries_se); results = NUMERIC_POINTER(R_summaries); resultsSE = NUMERIC_POINTER(R_summaries_se); tukeybiweight(matrix, rows, cols, results, resultsSE); PROTECT(R_return_value_names= allocVector(STRSXP,2)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(1); UNPROTECT(3); return R_return_value; } SEXP R_colSummarize_biweight(SEXP RMatrix){ SEXP R_return_value; SEXP R_return_value_names; SEXP R_summaries; SEXP R_summaries_se; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *resultsSE; int rows, cols; PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,2)); PROTECT(R_summaries = allocVector(REALSXP,cols)); PROTECT(R_summaries_se = allocVector(REALSXP,cols)); SET_VECTOR_ELT(R_return_value,0,R_summaries); SET_VECTOR_ELT(R_return_value,1,R_summaries_se); results = NUMERIC_POINTER(R_summaries); resultsSE = NUMERIC_POINTER(R_summaries_se); tukeybiweight_no_log(matrix, rows, cols, results, resultsSE); PROTECT(R_return_value_names= allocVector(STRSXP,2)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(1); UNPROTECT(3); return R_return_value; } SEXP R_colSummarize_medianpolish_log(SEXP RMatrix){ SEXP R_return_value; SEXP R_return_value_names; SEXP R_summaries; SEXP R_summaries_se; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *resultsSE; double *resids; int rows, cols; PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,2)); PROTECT(R_summaries = allocVector(REALSXP,cols)); PROTECT(R_summaries_se = allocVector(REALSXP,cols)); SET_VECTOR_ELT(R_return_value,0,R_summaries); SET_VECTOR_ELT(R_return_value,1,R_summaries_se); results = NUMERIC_POINTER(R_summaries); resultsSE = NUMERIC_POINTER(R_summaries_se); resids = Calloc(rows*cols, double); memcpy(resids, matrix, rows*cols*sizeof(double)); median_polish_log2_no_copy(resids, rows, cols, results, resultsSE); Free(resids); PROTECT(R_return_value_names= allocVector(STRSXP,2)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(1); UNPROTECT(3); return R_return_value; } SEXP R_colSummarize_medianpolish(SEXP RMatrix){ SEXP R_return_value; SEXP R_return_value_names; SEXP R_summaries; SEXP R_summaries_se; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *resultsSE; double *resids; int rows, cols; PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,2)); PROTECT(R_summaries = allocVector(REALSXP,cols)); PROTECT(R_summaries_se = allocVector(REALSXP,cols)); SET_VECTOR_ELT(R_return_value,0,R_summaries); SET_VECTOR_ELT(R_return_value,1,R_summaries_se); results = NUMERIC_POINTER(R_summaries); resultsSE = NUMERIC_POINTER(R_summaries_se); resids = Calloc(rows*cols, double); memcpy(resids, matrix, rows*cols*sizeof(double)); median_polish_no_copy(resids, rows, cols, results, resultsSE); Free(resids); PROTECT(R_return_value_names= allocVector(STRSXP,2)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(1); UNPROTECT(3); return R_return_value; } preprocessCore/src/R_colSummarize.h0000644000126300012640000000077712127220006021031 0ustar00biocbuildphs_compbio#ifndef R_COLSUMMARIZE_H #define R_COLSUMMARIZE_H SEXP R_colSummarize_avg_log(SEXP RMatrix); SEXP R_colSummarize_log_avg(SEXP RMatrix); SEXP R_colSummarize_log_median(SEXP RMatrix); SEXP R_colSummarize_median_log(SEXP RMatrix); SEXP R_colSummarize_biweight_log(SEXP RMatrix); SEXP R_colSummarize_medianpolish_log(SEXP RMatrix); SEXP R_colSummarize_avg(SEXP RMatrix); SEXP R_colSummarize_median(SEXP RMatrix); SEXP R_colSummarize_biweight(SEXP RMatrix); SEXP R_colSummarize_medianpolish(SEXP RMatrix); #endif preprocessCore/src/R_plmd_interfaces.c0000644000126300012640000001200312127220006021472 0ustar00biocbuildphs_compbio/********************************************************************* ** ** file: R_plmr_interfaces.c ** ** Aim: Code which provides .Call() interfaces to the PLM-d code. ** ** Copyright (C) 2008 Ben Bolstad ** ** created by: B. M. Bolstad ** ** created on: Jan 30, 2008 ** ** History ** Jan 30, 2008 - Initial version ** Jan 15, 2009 - Fix issues with STRING_ELT/VECTOR_ELT ** ** *********************************************************************/ #include #include #include #include #include "plmd.h" #include "rlm_se.h" #include "psi_fns.h" /******************************************************************************* *** ** ** SEXP R_plmd_model(SEXP Y, SEXP PsiCode, SEXP transform) ** ** ** SEXP Y - A matrix with probes in rows and arrays in columns ** SEXP PsiCode - An integer code corresponding to the function that should be used to determine ** how outliers are down weighted. ** SEXP PsiK - a parameter for weighting algorithm. ** ** Returns ** parameter estimates. weights, residuals, Standard error estimates, whether split ** *********************************************************************/ SEXP R_plmd_model(SEXP Y, SEXP PsiCode, SEXP PsiK, SEXP Groups, SEXP Ngroups){ SEXP R_return_value; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_was_split; SEXP R_return_value_names; SEXP dim1; double *beta; double *residuals; double *weights; double *se; int *was_split; int *groups; double residSE; double *Ymat; double *X; /* Needed for SE */ int X_cols, X_rows; int rows; int cols; int ngroups; int howmany_split =0; int i; PROTECT(dim1 = getAttrib(Y,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,5)); /* Don't allocate R_beta/R_SE straight away, we won't know how much space these will actually need until finishing the PLM-d fitting procedure. Instead we will just allocate those for which we currently know the size */ PROTECT(R_weights = allocMatrix(REALSXP,rows,cols)); PROTECT(R_residuals = allocMatrix(REALSXP,rows,cols)); PROTECT(R_was_split = allocVector(INTSXP,rows)); /* 0 - beta (added below) 1 - weights 2 - residuals 3 - standard errors (added below) 4 - R_was_split */ SET_VECTOR_ELT(R_return_value,1,R_weights); SET_VECTOR_ELT(R_return_value,2,R_residuals); SET_VECTOR_ELT(R_return_value,4,R_was_split); UNPROTECT(3); residuals = NUMERIC_POINTER(R_residuals); weights = NUMERIC_POINTER(R_weights); was_split = INTEGER_POINTER(R_was_split); groups = INTEGER_POINTER(Groups); ngroups = INTEGER(Ngroups)[0]; Ymat = NUMERIC_POINTER(Y); beta = Calloc(cols + rows*ngroups -1, double); se = Calloc(cols + rows*ngroups -1, double); plmd_fit(Ymat, rows, cols, ngroups, groups, was_split, beta, residuals, weights, PsiFunc(asInteger(PsiCode)),asReal(PsiK), 20); for (i = 0; i < rows; i++){ howmany_split+=was_split[i]; } if (howmany_split > 0){ PROTECT(R_beta = allocVector(REALSXP,rows + cols + howmany_split*(ngroups-1))); PROTECT(R_SE = allocVector(REALSXP,rows + cols + howmany_split*(ngroups-1))); X = plmd_get_design_matrix(rows, cols, ngroups, groups,was_split,&X_rows,&X_cols); rlm_compute_se(X,Ymat, X_rows, X_cols, beta, residuals, weights, se,(double *)NULL, &residSE, 2, PsiFunc(asInteger(PsiCode)),asReal(PsiK)); Free(X); for (i = cols; i < rows + cols + howmany_split*(ngroups-1) - 1; i++) beta[rows + cols + howmany_split*(ngroups-1) -1]-=beta[i]; for (i = 0; i < rows + cols + howmany_split*(ngroups-1) ; i++){ NUMERIC_POINTER(R_beta)[i] = beta[i]; NUMERIC_POINTER(R_SE)[i] = se[i]; } } else { /* Note use 2 rather than 4 for SE method */ rlm_compute_se_anova(Ymat, rows, cols, beta, residuals, weights,se, (double *) NULL, &residSE, 2, PsiFunc(asInteger(PsiCode)),asReal(PsiK)); beta[rows+cols -1] = 0.0; se[rows+cols -1] = 0.0; for (i = cols; i < rows + cols -1; i++) beta[rows+cols -1]-=beta[i]; PROTECT(R_beta = allocVector(REALSXP,rows+cols)); PROTECT(R_SE = allocVector(REALSXP,rows+cols)); for (i = 0; i < rows + cols; i++){ NUMERIC_POINTER(R_beta)[i] = beta[i]; NUMERIC_POINTER(R_SE)[i] = se[i]; } } Free(beta); Free(se); SET_VECTOR_ELT(R_return_value,0,R_beta); SET_VECTOR_ELT(R_return_value,3,R_SE); UNPROTECT(2); PROTECT(R_return_value_names= allocVector(STRSXP,5)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); SET_STRING_ELT(R_return_value_names,4,mkChar("WasSplit")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(2); return R_return_value; } preprocessCore/src/R_plmr_interfaces.c0000644000126300012640000003370612127220006021525 0ustar00biocbuildphs_compbio/********************************************************************* ** ** file: R_plmr_interfaces.c ** ** Aim: Code which provides .Call() interfaces to the PLM-r code. ** ** Copyright (C) 2008 Ben Bolstad ** ** created by: B. M. Bolstad ** ** created on: Jan 28, 2008 ** ** History ** Jan 28, 2008 - Initial version ** Jan 15, 2009 - fix issues with VECTOR_ELT/SET_VECTOR_ELT ** ** *********************************************************************/ #include #include #include #include #include "plmr.h" #include "rlm_se.h" #include "psi_fns.h" #include "R_plmr_interfaces.h" /********************************************************************************** ** ** SEXP R_plmr_model(SEXP Y, SEXP PsiCode, SEXP transform) ** ** ** SEXP Y - A matrix with probes in rows and arrays in columns ** SEXP PsiCode - An integer code corresponding to the function that should be used to determine ** how outliers are down weighted. ** SEXP PsiK - a parameter for weighting algorithm. ** ** Returns ** parameter estimates. weights, residuals, Standard error estimates ** *********************************************************************/ SEXP R_plmr_model(SEXP Y, SEXP PsiCode, SEXP PsiK){ SEXP R_return_value; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_return_value_names; SEXP dim1; double *beta; double *residuals; double *weights; double *se; double residSE; double *Ymat; int rows; int cols; int i; PROTECT(dim1 = getAttrib(Y,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,4)); PROTECT(R_beta = allocVector(REALSXP, rows + cols)); PROTECT(R_weights = allocMatrix(REALSXP,rows,cols)); PROTECT(R_residuals = allocMatrix(REALSXP,rows,cols)); PROTECT(R_SE = allocVector(REALSXP,rows+cols)); SET_VECTOR_ELT(R_return_value,0,R_beta); SET_VECTOR_ELT(R_return_value,1,R_weights); SET_VECTOR_ELT(R_return_value,2,R_residuals); SET_VECTOR_ELT(R_return_value,3,R_SE); UNPROTECT(4); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); Ymat = NUMERIC_POINTER(Y); plmr_fit(Ymat, rows, cols, beta, residuals, weights, PsiFunc(asInteger(PsiCode)),asReal(PsiK), 20, 0); /* Note use 2 rather than 4 for SE method */ rlm_compute_se_anova(Ymat, rows, cols, beta, residuals, weights,se, (double *)NULL, &residSE, 2, PsiFunc(asInteger(PsiCode)),asReal(PsiK)); beta[rows+cols -1] = 0.0; se[rows+cols -1] = 0.0; for (i = cols; i < rows + cols -1; i++) beta[rows+cols -1]-=beta[i]; PROTECT(R_return_value_names= allocVector(STRSXP,4)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(2); return R_return_value; } /********************************************************************************** ** ** SEXP R_plmrr_model(SEXP Y, SEXP PsiCode, SEXP transform) ** ** ** SEXP Y - A matrix with probes in rows and arrays in columns ** SEXP PsiCode - An integer code corresponding to the function that should be used to determine ** how outliers are down weighted. ** SEXP PsiK - a parameter for weighting algorithm. ** ** Returns ** parameter estimates. weights, residuals, Standard error estimates ** *********************************************************************/ SEXP R_plmrr_model(SEXP Y, SEXP PsiCode, SEXP PsiK){ SEXP R_return_value; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_return_value_names; SEXP dim1; double *beta; double *residuals; double *weights; double *se; double residSE; double *Ymat; int rows; int cols; int i; PROTECT(dim1 = getAttrib(Y,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,4)); PROTECT(R_beta = allocVector(REALSXP, rows + cols)); PROTECT(R_weights = allocMatrix(REALSXP,rows,cols)); PROTECT(R_residuals = allocMatrix(REALSXP,rows,cols)); PROTECT(R_SE = allocVector(REALSXP,rows+cols)); SET_VECTOR_ELT(R_return_value,0,R_beta); SET_VECTOR_ELT(R_return_value,1,R_weights); SET_VECTOR_ELT(R_return_value,2,R_residuals); SET_VECTOR_ELT(R_return_value,3,R_SE); UNPROTECT(4); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); Ymat = NUMERIC_POINTER(Y); plmrr_fit(Ymat, rows, cols, beta, residuals, weights, PsiFunc(asInteger(PsiCode)),asReal(PsiK), 20, 0); /* Note use 2 rather than 4 for SE method */ rlm_compute_se_anova(Ymat, rows, cols, beta, residuals, weights,se, (double *)NULL, &residSE, 2, PsiFunc(asInteger(PsiCode)),asReal(PsiK)); beta[rows+cols -1] = 0.0; se[rows+cols -1] = 0.0; for (i = cols; i < rows + cols -1; i++) beta[rows+cols -1]-=beta[i]; PROTECT(R_return_value_names= allocVector(STRSXP,4)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(2); return R_return_value; } /********************************************************************************** ** ** SEXP R_plmrc_model(SEXP Y, SEXP PsiCode, SEXP transform) ** ** ** SEXP Y - A matrix with probes in rows and arrays in columns ** SEXP PsiCode - An integer code corresponding to the function that should be used to determine ** how outliers are down weighted. ** SEXP PsiK - a parameter for weighting algorithm. ** ** Returns ** parameter estimates. weights, residuals, Standard error estimates ** *********************************************************************/ SEXP R_plmrc_model(SEXP Y, SEXP PsiCode, SEXP PsiK){ SEXP R_return_value; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_return_value_names; SEXP dim1; double *beta; double *residuals; double *weights; double *se; double residSE; double *Ymat; int rows; int cols; int i; PROTECT(dim1 = getAttrib(Y,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,4)); PROTECT(R_beta = allocVector(REALSXP, rows + cols)); PROTECT(R_weights = allocMatrix(REALSXP,rows,cols)); PROTECT(R_residuals = allocMatrix(REALSXP,rows,cols)); PROTECT(R_SE = allocVector(REALSXP,rows+cols)); SET_VECTOR_ELT(R_return_value,0,R_beta); SET_VECTOR_ELT(R_return_value,1,R_weights); SET_VECTOR_ELT(R_return_value,2,R_residuals); SET_VECTOR_ELT(R_return_value,3,R_SE); UNPROTECT(4); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); Ymat = NUMERIC_POINTER(Y); plmrc_fit(Ymat, rows, cols, beta, residuals, weights, PsiFunc(asInteger(PsiCode)),asReal(PsiK), 20, 0); /* Note use 2 rather than 4 for SE method */ rlm_compute_se_anova(Ymat, rows, cols, beta, residuals, weights,se, (double *)NULL, &residSE, 2, PsiFunc(asInteger(PsiCode)),asReal(PsiK)); beta[rows+cols -1] = 0.0; se[rows+cols -1] = 0.0; for (i = cols; i < rows + cols -1; i++) beta[rows+cols -1]-=beta[i]; PROTECT(R_return_value_names= allocVector(STRSXP,4)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(2); return R_return_value; } SEXP R_wplmr_model(SEXP Y, SEXP PsiCode, SEXP PsiK, SEXP Weights){ SEXP R_return_value; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_return_value_names; SEXP dim1; double *beta; double *residuals; double *weights; double *se; double residSE; double *Ymat; double *w; int rows; int cols; int i; PROTECT(dim1 = getAttrib(Y,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,4)); PROTECT(R_beta = allocVector(REALSXP, rows + cols)); PROTECT(R_weights = allocMatrix(REALSXP,rows,cols)); PROTECT(R_residuals = allocMatrix(REALSXP,rows,cols)); PROTECT(R_SE = allocVector(REALSXP,rows+cols)); SET_VECTOR_ELT(R_return_value,0,R_beta); SET_VECTOR_ELT(R_return_value,1,R_weights); SET_VECTOR_ELT(R_return_value,2,R_residuals); SET_VECTOR_ELT(R_return_value,3,R_SE); UNPROTECT(4); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); Ymat = NUMERIC_POINTER(Y); w = NUMERIC_POINTER(Weights); plmr_wfit(Ymat, rows, cols, w, beta, residuals, weights, PsiFunc(asInteger(PsiCode)),asReal(PsiK), 20, 0); /* Note use 2 is 4 for std PLM */ rlm_compute_se_anova(Ymat, rows, cols, beta, residuals, weights,se, (double *)NULL, &residSE, 2, PsiFunc(asInteger(PsiCode)),asReal(PsiK)); beta[rows+cols -1] = 0.0; se[rows+cols -1] = 0.0; for (i = cols; i < rows + cols -1; i++) beta[rows+cols -1]-=beta[i]; PROTECT(R_return_value_names= allocVector(STRSXP,4)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(2); return R_return_value; } SEXP R_wplmrr_model(SEXP Y, SEXP PsiCode, SEXP PsiK, SEXP Weights){ SEXP R_return_value; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_return_value_names; SEXP dim1; double *beta; double *residuals; double *weights; double *se; double residSE; double *Ymat; double *w; int rows; int cols; int i; PROTECT(dim1 = getAttrib(Y,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,4)); PROTECT(R_beta = allocVector(REALSXP, rows + cols)); PROTECT(R_weights = allocMatrix(REALSXP,rows,cols)); PROTECT(R_residuals = allocMatrix(REALSXP,rows,cols)); PROTECT(R_SE = allocVector(REALSXP,rows+cols)); SET_VECTOR_ELT(R_return_value,0,R_beta); SET_VECTOR_ELT(R_return_value,1,R_weights); SET_VECTOR_ELT(R_return_value,2,R_residuals); SET_VECTOR_ELT(R_return_value,3,R_SE); UNPROTECT(4); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); Ymat = NUMERIC_POINTER(Y); w = NUMERIC_POINTER(Weights); plmrr_wfit(Ymat, rows, cols, w, beta, residuals, weights, PsiFunc(asInteger(PsiCode)),asReal(PsiK), 20, 0); /* Note use 2 is 4 for std PLM */ rlm_compute_se_anova(Ymat, rows, cols, beta, residuals, weights,se, (double *)NULL, &residSE, 2, PsiFunc(asInteger(PsiCode)),asReal(PsiK)); beta[rows+cols -1] = 0.0; se[rows+cols -1] = 0.0; for (i = cols; i < rows + cols -1; i++) beta[rows+cols -1]-=beta[i]; PROTECT(R_return_value_names= allocVector(STRSXP,4)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(2); return R_return_value; } SEXP R_wplmrc_model(SEXP Y, SEXP PsiCode, SEXP PsiK, SEXP Weights){ SEXP R_return_value; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_return_value_names; SEXP dim1; double *beta; double *residuals; double *weights; double *se; double residSE; double *Ymat; double *w; int rows; int cols; int i; PROTECT(dim1 = getAttrib(Y,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,4)); PROTECT(R_beta = allocVector(REALSXP, rows + cols)); PROTECT(R_weights = allocMatrix(REALSXP,rows,cols)); PROTECT(R_residuals = allocMatrix(REALSXP,rows,cols)); PROTECT(R_SE = allocVector(REALSXP,rows+cols)); SET_VECTOR_ELT(R_return_value,0,R_beta); SET_VECTOR_ELT(R_return_value,1,R_weights); SET_VECTOR_ELT(R_return_value,2,R_residuals); SET_VECTOR_ELT(R_return_value,3,R_SE); UNPROTECT(4); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); Ymat = NUMERIC_POINTER(Y); w = NUMERIC_POINTER(Weights); plmrc_wfit(Ymat, rows, cols, w, beta, residuals, weights, PsiFunc(asInteger(PsiCode)),asReal(PsiK), 20, 0); /* Note use 2 is 4 for std PLM */ rlm_compute_se_anova(Ymat, rows, cols, beta, residuals, weights,se, (double *)NULL, &residSE, 2, PsiFunc(asInteger(PsiCode)),asReal(PsiK)); beta[rows+cols -1] = 0.0; se[rows+cols -1] = 0.0; for (i = cols; i < rows + cols -1; i++) beta[rows+cols -1]-=beta[i]; PROTECT(R_return_value_names= allocVector(STRSXP,4)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(2); return R_return_value; } preprocessCore/src/R_plmr_interfaces.h0000644000126300012640000000065612127220006021530 0ustar00biocbuildphs_compbio#ifndef R_PLMR_INTERFACES_H #define R_PLMR_INTERFACES_H SEXP R_plmr_model(SEXP Y, SEXP PsiCode, SEXP PsiK); SEXP R_wplmr_model(SEXP Y, SEXP PsiCode, SEXP PsiK, SEXP Weights); SEXP R_plmrr_model(SEXP Y, SEXP PsiCode, SEXP PsiK); SEXP R_plmrc_model(SEXP Y, SEXP PsiCode, SEXP PsiK); SEXP R_wplmrr_model(SEXP Y, SEXP PsiCode, SEXP PsiK, SEXP Weights); SEXP R_wplmrc_model(SEXP Y, SEXP PsiCode, SEXP PsiK, SEXP Weights); #endif preprocessCore/src/R_rlm_interfaces.c0000644000126300012640000003234012127220006021336 0ustar00biocbuildphs_compbio/********************************************************************* ** ** file: R_rlm_interfaces.c ** ** Aim: Code which provides .Call() interfaces to the rlm code. ** ** Copyright (C) 2006-2007 Ben Bolstad ** ** created by: B. M. Bolstad ** ** created on: Aug 16, 2006 ** ** History ** Aug 16, 2006 Initial version ** Nov 1, 2006 - add SE to output of function ** Sep 13, 2007 - Make the value of the constrained parameters something more sensible ** Sep 14, 2007 - Add medianpolish code interface (yes it is not really an rlm method, ** but it is analogous enough in the format presented here) ** Jan 15, 2009 - fix STRING_ELT/VECTOR_ELT issues ** Apr 23, 2009 - R_rlm_rma_default_model now returns scale estimate ** Apr 28, 2009 - R_wrlm_rma_default_model now returns scale estimate ** Aug 22, 2009 - fix issue with input scales ** *********************************************************************/ #include #include #include #include #include "rlm.h" #include "rlm_se.h" #include "psi_fns.h" #include "medianpolish.h" #include "R_rlm_interfaces.h" /********************************************************************************** ** ** SEXP R_rlm_rma_default_model(SEXP Y, SEXP PsiCode, SEXP transform) ** ** ** SEXP Y - A matrix with probes in rows and arrays in columns ** SEXP PsiCode - An integer code corresponding to the function that should be used to determine ** how outliers are down weighted. ** SEXP PsiK - a parameter for weighting algorithm. ** ** Returns ** parameter estimates. weights, residuals, Standard error estimates ** *********************************************************************/ SEXP R_rlm_rma_default_model(SEXP Y, SEXP PsiCode, SEXP PsiK, SEXP Scales){ SEXP R_return_value; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_scale; SEXP R_return_value_names; SEXP dim1; double *beta; double *residuals; double *weights; double *se; double scale=-1.0; double *scaleptr; double residSE; double *Ymat; int rows; int cols; int i; PROTECT(dim1 = getAttrib(Y,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,5)); PROTECT(R_beta = allocVector(REALSXP, rows + cols)); PROTECT(R_weights = allocMatrix(REALSXP,rows,cols)); PROTECT(R_residuals = allocMatrix(REALSXP,rows,cols)); PROTECT(R_SE = allocVector(REALSXP,rows+cols)); PROTECT(R_scale = allocVector(REALSXP,1)); SET_VECTOR_ELT(R_return_value,0,R_beta); SET_VECTOR_ELT(R_return_value,1,R_weights); SET_VECTOR_ELT(R_return_value,2,R_residuals); SET_VECTOR_ELT(R_return_value,3,R_SE); SET_VECTOR_ELT(R_return_value,4,R_scale); UNPROTECT(5); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); scaleptr = NUMERIC_POINTER(R_scale); if (isNull(Scales)){ scaleptr[0] = -1.0; } else if (length(Scales) != cols) { scaleptr[0] = NUMERIC_POINTER(Scales)[0]; } Ymat = NUMERIC_POINTER(Y); rlm_fit_anova_scale(Ymat, rows, cols, scaleptr, beta, residuals, weights, PsiFunc(asInteger(PsiCode)),asReal(PsiK), 20, 0); rlm_compute_se_anova(Ymat, rows, cols, beta, residuals, weights,se, (double *)NULL, &residSE, 4, PsiFunc(asInteger(PsiCode)),asReal(PsiK)); beta[rows+cols -1] = 0.0; for (i = cols; i < rows + cols -1; i++) beta[rows+cols -1]-=beta[i]; PROTECT(R_return_value_names= allocVector(STRSXP,5)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); SET_STRING_ELT(R_return_value_names,4,mkChar("Scale")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(2); return R_return_value; } SEXP R_wrlm_rma_default_model(SEXP Y, SEXP PsiCode, SEXP PsiK, SEXP Weights, SEXP Scales){ SEXP R_return_value; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_scale; SEXP R_return_value_names; SEXP dim1; double *beta; double *residuals; double *weights; double *se; double scale=-1.0; double *scaleptr; double residSE; double *Ymat; double *w; int rows; int cols; int i; PROTECT(dim1 = getAttrib(Y,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,5)); PROTECT(R_beta = allocVector(REALSXP, rows + cols)); PROTECT(R_weights = allocMatrix(REALSXP,rows,cols)); PROTECT(R_residuals = allocMatrix(REALSXP,rows,cols)); PROTECT(R_SE = allocVector(REALSXP,rows+cols)); PROTECT(R_scale = allocVector(REALSXP,1)); SET_VECTOR_ELT(R_return_value,0,R_beta); SET_VECTOR_ELT(R_return_value,1,R_weights); SET_VECTOR_ELT(R_return_value,2,R_residuals); SET_VECTOR_ELT(R_return_value,3,R_SE); SET_VECTOR_ELT(R_return_value,4,R_scale); UNPROTECT(5); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); scaleptr = NUMERIC_POINTER(R_scale); if (isNull(Scales)){ scaleptr[0] = -1.0; } else if (length(Scales) != cols) { scaleptr[0] = NUMERIC_POINTER(Scales)[0]; } Ymat = NUMERIC_POINTER(Y); w = NUMERIC_POINTER(Weights); rlm_wfit_anova_scale(Ymat, rows, cols, scaleptr, w, beta, residuals, weights, PsiFunc(asInteger(PsiCode)),asReal(PsiK), 20, 0); rlm_compute_se_anova(Ymat, rows, cols, beta, residuals, weights,se, (double *)NULL, &residSE, 4, PsiFunc(asInteger(PsiCode)),asReal(PsiK)); beta[rows+cols -1] = 0.0; se[rows+cols -1] = 0.0; for (i = cols; i < rows + cols -1; i++) beta[rows+cols -1]-=beta[i]; PROTECT(R_return_value_names= allocVector(STRSXP,5)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); SET_STRING_ELT(R_return_value_names,4,mkChar("Scale")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(2); return R_return_value; } SEXP R_medianpolish_rma_default_model(SEXP Y){ SEXP R_return_value; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_return_value_names; SEXP dim1; double *beta; double *residuals; double *weights; double *se; double intercept; double *Ymat; int rows; int cols; int i; PROTECT(dim1 = getAttrib(Y,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,4)); PROTECT(R_beta = allocVector(REALSXP, rows + cols)); /* PROTECT(R_weights = allocMatrix(REALSXP,rows,cols));*/ PROTECT(R_residuals = allocMatrix(REALSXP,rows,cols)); /* PROTECT(R_SE = allocVector(REALSXP,rows+cols)); */ R_weights = R_NilValue; R_SE = R_NilValue; SET_VECTOR_ELT(R_return_value,0,R_beta); SET_VECTOR_ELT(R_return_value,1,R_weights); SET_VECTOR_ELT(R_return_value,2,R_residuals); SET_VECTOR_ELT(R_return_value,3,R_SE); UNPROTECT(2); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); /* weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); */ Ymat = NUMERIC_POINTER(Y); for (i=0; i < rows*cols; i++){ residuals[i] = Ymat[i]; } memset(beta, 0, (rows+cols)*sizeof(double)); median_polish_fit_no_copy(residuals, rows, cols, &beta[cols], &beta[0], &intercept); for (i=0; i < cols; i++) beta[i]+=intercept; PROTECT(R_return_value_names= allocVector(STRSXP,4)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(2); return R_return_value; } SEXP R_rlm_rma_given_probe_effects(SEXP Y, SEXP probe_effects, SEXP PsiCode, SEXP PsiK, SEXP Scales){ SEXP R_return_value; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_scale; SEXP R_return_value_names; SEXP dim1; double *beta; double *residuals; double *weights; double *se; double *scaleptr; double *probeeffects; double residSE; double *Ymat; int rows; int cols; int i; PROTECT(dim1 = getAttrib(Y,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,5)); PROTECT(R_beta = allocVector(REALSXP, cols)); PROTECT(R_weights = allocMatrix(REALSXP,rows,cols)); PROTECT(R_residuals = allocMatrix(REALSXP,rows,cols)); PROTECT(R_SE = allocVector(REALSXP,cols)); PROTECT(R_scale = allocVector(REALSXP,cols)); SET_VECTOR_ELT(R_return_value,0,R_beta); SET_VECTOR_ELT(R_return_value,1,R_weights); SET_VECTOR_ELT(R_return_value,2,R_residuals); SET_VECTOR_ELT(R_return_value,3,R_SE); SET_VECTOR_ELT(R_return_value,4,R_scale); UNPROTECT(5); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); scaleptr = NUMERIC_POINTER(R_scale); if (isNull(Scales)){ for (i =0; i < cols; i++){ scaleptr[i] = -1.0; } } else if (length(Scales) != cols) { for (i =0; i < cols; i++){ scaleptr[i] = NUMERIC_POINTER(Scales)[0]; } } else if (length(Scales) == cols){ for (i =0; i < cols; i++){ scaleptr[i] = NUMERIC_POINTER(Scales)[i]; } } probeeffects = NUMERIC_POINTER(probe_effects); Ymat = NUMERIC_POINTER(Y); rlm_fit_anova_given_probe_effects_scale(Ymat, rows, cols, scaleptr, probeeffects, beta, residuals, weights, PsiFunc(asInteger(PsiCode)),asReal(PsiK), 20, 0); rlm_compute_se_anova_given_probe_effects(Ymat, rows, cols, probeeffects, beta, residuals, weights,se, (double *)NULL, &residSE, 4, PsiFunc(asInteger(PsiCode)),asReal(PsiK)); PROTECT(R_return_value_names= allocVector(STRSXP,5)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); SET_STRING_ELT(R_return_value_names,4,mkChar("Scale")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(2); return R_return_value; } SEXP R_wrlm_rma_given_probe_effects(SEXP Y, SEXP probe_effects, SEXP PsiCode, SEXP PsiK, SEXP Weights, SEXP Scales){ SEXP R_return_value; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_scale; SEXP R_return_value_names; SEXP dim1; double *beta; double *residuals; double *weights; double *se; double *scaleptr; double *w; double *probeeffects; double residSE; double *Ymat; int rows; int cols; int i; PROTECT(dim1 = getAttrib(Y,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,5)); PROTECT(R_beta = allocVector(REALSXP, cols)); PROTECT(R_weights = allocMatrix(REALSXP,rows,cols)); PROTECT(R_residuals = allocMatrix(REALSXP,rows,cols)); PROTECT(R_SE = allocVector(REALSXP,cols)); PROTECT(R_scale = allocVector(REALSXP,cols)); SET_VECTOR_ELT(R_return_value,0,R_beta); SET_VECTOR_ELT(R_return_value,1,R_weights); SET_VECTOR_ELT(R_return_value,2,R_residuals); SET_VECTOR_ELT(R_return_value,3,R_SE); SET_VECTOR_ELT(R_return_value,4,R_scale); UNPROTECT(5); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); probeeffects = NUMERIC_POINTER(probe_effects); scaleptr = NUMERIC_POINTER(R_scale); if (isNull(Scales)){ for (i =0; i < cols; i++){ scaleptr[i] = -1.0; } } else if (length(Scales) != cols) { for (i =0; i < cols; i++){ scaleptr[i] = NUMERIC_POINTER(Scales)[0]; } } else if (length(Scales) == cols){ for (i =0; i < cols; i++){ scaleptr[i] = NUMERIC_POINTER(Scales)[i]; } } Ymat = NUMERIC_POINTER(Y); w = NUMERIC_POINTER(Weights); rlm_wfit_anova_given_probe_effects_scale(Ymat, rows, cols, scaleptr, probeeffects, w, beta, residuals, weights, PsiFunc(asInteger(PsiCode)),asReal(PsiK), 20, 0); rlm_compute_se_anova_given_probe_effects(Ymat, rows, cols, probeeffects, beta, residuals, weights,se, (double *)NULL, &residSE, 4, PsiFunc(asInteger(PsiCode)),asReal(PsiK)); PROTECT(R_return_value_names= allocVector(STRSXP,5)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); SET_STRING_ELT(R_return_value_names,4,mkChar("Scale")); setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); UNPROTECT(2); return R_return_value; } preprocessCore/src/R_rlm_interfaces.h0000644000126300012640000000076112127220006021345 0ustar00biocbuildphs_compbio#ifndef R_RLM_INTERFACES_H #define R_RLM_INTERFACES_H SEXP R_rlm_rma_default_model(SEXP Y, SEXP PsiCode, SEXP PsiK, SEXP Scales); SEXP R_wrlm_rma_default_model(SEXP Y, SEXP PsiCode, SEXP PsiK, SEXP Weights, SEXP Scales); SEXP R_medianpolish_rma_default_model(SEXP Y); SEXP R_rlm_rma_given_probe_effects(SEXP Y, SEXP probe_effects, SEXP PsiCode, SEXP PsiK, SEXP Scales); SEXP R_wrlm_rma_given_probe_effects(SEXP Y, SEXP probe_effects, SEXP PsiCode, SEXP PsiK, SEXP Weights, SEXP Scales); #endif preprocessCore/src/R_subColSummarize.c0000644000126300012640000014151512127220006021472 0ustar00biocbuildphs_compbio/********************************************************************* ** ** file: R_subColSummarize.c ** ** Aim: Code which provides .Call() interfaces to the subcolumn ** summarization code. ** ** Copyright (C) 2007 Ben Bolstad ** ** created by: B. M. Bolstad ** ** created on: Sep 15, 2007 ** ** History ** Sep 18, 2007 - Initial version ** Mar 24, 2008 - Add multi-threaded implementation based on pthreads ** for each R_subColSummarize_* ** ** Dec 1, 2010 - change how PTHREAD_STACK_MIN is used ** *********************************************************************/ #include #include #include #include #include "avg_log.h" #include "log_avg.h" #include "avg.h" #include "biweight.h" #include "median_log.h" #include "log_median.h" #include "median.h" #include "medianpolish.h" #ifdef USE_PTHREADS #include #include #include #define THREADS_ENV_VAR "R_THREADS" pthread_mutex_t mutex_R; struct loop_data{ double *matrix; double *results; SEXP *R_rowIndexList; int rows; int cols; int length_rowIndexList; int start_row; int end_row; }; #endif #ifdef USE_PTHREADS static void *subColSummarize_avg_log_group(void *data){ struct loop_data *args = (struct loop_data *) data; int *cur_rows; double *buffer; int i, j; int ncur_rows; buffer = Calloc(args->cols,double); for (j = args->start_row; j <= args->end_row; j++){ ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j)); AverageLog_noSE(args->matrix, args->rows, args->cols, cur_rows, buffer, ncur_rows); pthread_mutex_lock (&mutex_R); for (i = 0; i < args->cols; i++){ args->results[i*args->length_rowIndexList + j] = buffer[i]; } pthread_mutex_unlock (&mutex_R); } Free(buffer); } #endif SEXP R_subColSummarize_avg_log(SEXP RMatrix, SEXP R_rowIndexList){ SEXP R_summaries; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *buffer; int *cur_rows; int rows, cols; int length_rowIndexList = LENGTH(R_rowIndexList); int ncur_rows; int i,j; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_summaries = allocMatrix(REALSXP,length_rowIndexList,cols)); results = NUMERIC_POINTER(R_summaries); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of subColumns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of subColumns is less than the number of threads */ if (num_threads < length_rowIndexList){ chunk_size = length_rowIndexList/num_threads; chunk_size_d = ((double) length_rowIndexList)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data); args[0].matrix = matrix; args[0].results = results; args[0].R_rowIndexList = &R_rowIndexList; args[0].rows = rows; args[0].cols = cols; args[0].length_rowIndexList = length_rowIndexList; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_row = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_row = i+chunk_size; i++; } else{ args[t].end_row = i+chunk_size-1; } t++; } for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, subColSummarize_avg_log_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else buffer = Calloc(cols,double); for (j =0; j < length_rowIndexList; j++){ ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j)); AverageLog_noSE(matrix, rows, cols, cur_rows, buffer, ncur_rows); for (i = 0; i < cols; i++){ results[i*length_rowIndexList + j] = buffer[i]; } } Free(buffer); #endif UNPROTECT(1); return R_summaries; } #ifdef USE_PTHREADS static void *subColSummarize_log_avg_group(void *data){ struct loop_data *args = (struct loop_data *) data; int *cur_rows; double *buffer; int i, j; int ncur_rows; buffer = Calloc(args->cols,double); for (j = args->start_row; j <= args->end_row; j++){ ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j)); LogAverage_noSE(args->matrix, args->rows, args->cols, cur_rows, buffer, ncur_rows); pthread_mutex_lock (&mutex_R); for (i = 0; i < args->cols; i++){ args->results[i*args->length_rowIndexList + j] = buffer[i]; } pthread_mutex_unlock (&mutex_R); } Free(buffer); } #endif SEXP R_subColSummarize_log_avg(SEXP RMatrix, SEXP R_rowIndexList){ SEXP R_summaries; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *buffer; int *cur_rows; int rows, cols; int length_rowIndexList = LENGTH(R_rowIndexList); int ncur_rows; int i,j; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_summaries = allocMatrix(REALSXP,length_rowIndexList,cols)); results = NUMERIC_POINTER(R_summaries); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of subColumns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of subColumns is less than the number of threads */ if (num_threads < length_rowIndexList){ chunk_size = length_rowIndexList/num_threads; chunk_size_d = ((double) length_rowIndexList)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data); args[0].matrix = matrix; args[0].results = results; args[0].R_rowIndexList = &R_rowIndexList; args[0].rows = rows; args[0].cols = cols; args[0].length_rowIndexList = length_rowIndexList; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_row = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_row = i+chunk_size; i++; } else{ args[t].end_row = i+chunk_size-1; } t++; } for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, subColSummarize_log_avg_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else buffer = Calloc(cols,double); for (j =0; j < length_rowIndexList; j++){ ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j)); LogAverage_noSE(matrix, rows, cols, cur_rows, buffer, ncur_rows); for (i = 0; i < cols; i++){ results[i*length_rowIndexList + j] = buffer[i]; } } Free(buffer); #endif UNPROTECT(1); return R_summaries; } #ifdef USE_PTHREADS static void *subColSummarize_avg_group(void *data){ struct loop_data *args = (struct loop_data *) data; int *cur_rows; double *buffer; int i, j; int ncur_rows; buffer = Calloc(args->cols,double); for (j = args->start_row; j <= args->end_row; j++){ ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j)); ColAverage_noSE(args->matrix, args->rows, args->cols, cur_rows, buffer, ncur_rows); pthread_mutex_lock (&mutex_R); for (i = 0; i < args->cols; i++){ args->results[i*args->length_rowIndexList + j] = buffer[i]; } pthread_mutex_unlock (&mutex_R); } Free(buffer); } #endif SEXP R_subColSummarize_avg(SEXP RMatrix, SEXP R_rowIndexList){ SEXP R_summaries; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *buffer; int *cur_rows; int rows, cols; int length_rowIndexList = LENGTH(R_rowIndexList); int ncur_rows; int i,j; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_summaries = allocMatrix(REALSXP,length_rowIndexList,cols)); results = NUMERIC_POINTER(R_summaries); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of subColumns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of subColumns is less than the number of threads */ if (num_threads < length_rowIndexList){ chunk_size = length_rowIndexList/num_threads; chunk_size_d = ((double) length_rowIndexList)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data); args[0].matrix = matrix; args[0].results = results; args[0].R_rowIndexList = &R_rowIndexList; args[0].rows = rows; args[0].cols = cols; args[0].length_rowIndexList = length_rowIndexList; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_row = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_row = i+chunk_size; i++; } else{ args[t].end_row = i+chunk_size-1; } t++; } for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, subColSummarize_avg_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else buffer = Calloc(cols,double); for (j =0; j < length_rowIndexList; j++){ ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j)); ColAverage_noSE(matrix, rows, cols, cur_rows, buffer, ncur_rows); for (i = 0; i < cols; i++){ results[i*length_rowIndexList + j] = buffer[i]; } } Free(buffer); #endif UNPROTECT(1); return R_summaries; } #ifdef USE_PTHREADS static void *subColSummarize_biweight_log_group(void *data){ struct loop_data *args = (struct loop_data *) data; int *cur_rows; double *buffer; int i, j; int ncur_rows; buffer = Calloc(args->cols,double); for (j = args->start_row; j <= args->end_row; j++){ ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j)); TukeyBiweight_noSE(args->matrix, args->rows, args->cols, cur_rows, buffer, ncur_rows); pthread_mutex_lock (&mutex_R); for (i = 0; i < args->cols; i++){ args->results[i*args->length_rowIndexList + j] = buffer[i]; } pthread_mutex_unlock (&mutex_R); } Free(buffer); } #endif SEXP R_subColSummarize_biweight_log(SEXP RMatrix, SEXP R_rowIndexList){ SEXP R_summaries; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *buffer; int *cur_rows; int rows, cols; int length_rowIndexList = LENGTH(R_rowIndexList); int ncur_rows; int i,j; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_summaries = allocMatrix(REALSXP,length_rowIndexList,cols)); results = NUMERIC_POINTER(R_summaries); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of subColumns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of subColumns is less than the number of threads */ if (num_threads < length_rowIndexList){ chunk_size = length_rowIndexList/num_threads; chunk_size_d = ((double) length_rowIndexList)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data); args[0].matrix = matrix; args[0].results = results; args[0].R_rowIndexList = &R_rowIndexList; args[0].rows = rows; args[0].cols = cols; args[0].length_rowIndexList = length_rowIndexList; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_row = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_row = i+chunk_size; i++; } else{ args[t].end_row = i+chunk_size-1; } t++; } for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, subColSummarize_biweight_log_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else buffer = Calloc(cols,double); for (j =0; j < length_rowIndexList; j++){ ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j)); TukeyBiweight_noSE(matrix, rows, cols, cur_rows, buffer, ncur_rows); for (i = 0; i < cols; i++){ results[i*length_rowIndexList + j] = buffer[i]; } } Free(buffer); #endif UNPROTECT(1); return R_summaries; } #ifdef USE_PTHREADS static void *subColSummarize_biweight_group(void *data){ struct loop_data *args = (struct loop_data *) data; int *cur_rows; double *buffer; int i, j; int ncur_rows; buffer = Calloc(args->cols,double); for (j = args->start_row; j <= args->end_row; j++){ ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j)); TukeyBiweight_no_log_noSE(args->matrix, args->rows, args->cols, cur_rows, buffer, ncur_rows); pthread_mutex_lock (&mutex_R); for (i = 0; i < args->cols; i++){ args->results[i*args->length_rowIndexList + j] = buffer[i]; } pthread_mutex_unlock (&mutex_R); } Free(buffer); } #endif SEXP R_subColSummarize_biweight(SEXP RMatrix, SEXP R_rowIndexList){ SEXP R_summaries; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *buffer; int *cur_rows; int rows, cols; int length_rowIndexList = LENGTH(R_rowIndexList); int ncur_rows; int i,j; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_summaries = allocMatrix(REALSXP,length_rowIndexList,cols)); results = NUMERIC_POINTER(R_summaries); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of subColumns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of subColumns is less than the number of threads */ if (num_threads < length_rowIndexList){ chunk_size = length_rowIndexList/num_threads; chunk_size_d = ((double) length_rowIndexList)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data); args[0].matrix = matrix; args[0].results = results; args[0].R_rowIndexList = &R_rowIndexList; args[0].rows = rows; args[0].cols = cols; args[0].length_rowIndexList = length_rowIndexList; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_row = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_row = i+chunk_size; i++; } else{ args[t].end_row = i+chunk_size-1; } t++; } for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, subColSummarize_biweight_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else buffer = Calloc(cols,double); for (j =0; j < length_rowIndexList; j++){ ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j)); TukeyBiweight_no_log_noSE(matrix, rows, cols, cur_rows, buffer, ncur_rows); for (i = 0; i < cols; i++){ results[i*length_rowIndexList + j] = buffer[i]; } } Free(buffer); #endif UNPROTECT(1); return R_summaries; } #ifdef USE_PTHREADS static void *subColSummarize_median_log_group(void *data){ struct loop_data *args = (struct loop_data *) data; int *cur_rows; double *buffer; int i, j; int ncur_rows; buffer = Calloc(args->cols,double); for (j = args->start_row; j <= args->end_row; j++){ ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j)); MedianLog_noSE(args->matrix, args->rows, args->cols, cur_rows, buffer, ncur_rows); pthread_mutex_lock (&mutex_R); for (i = 0; i < args->cols; i++){ args->results[i*args->length_rowIndexList + j] = buffer[i]; } pthread_mutex_unlock (&mutex_R); } Free(buffer); } #endif SEXP R_subColSummarize_median_log(SEXP RMatrix, SEXP R_rowIndexList){ SEXP R_summaries; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *buffer; int *cur_rows; int rows, cols; int length_rowIndexList = LENGTH(R_rowIndexList); int ncur_rows; int i,j; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_summaries = allocMatrix(REALSXP,length_rowIndexList,cols)); results = NUMERIC_POINTER(R_summaries); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of subColumns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of subColumns is less than the number of threads */ if (num_threads < length_rowIndexList){ chunk_size = length_rowIndexList/num_threads; chunk_size_d = ((double) length_rowIndexList)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data); args[0].matrix = matrix; args[0].results = results; args[0].R_rowIndexList = &R_rowIndexList; args[0].rows = rows; args[0].cols = cols; args[0].length_rowIndexList = length_rowIndexList; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_row = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_row = i+chunk_size; i++; } else{ args[t].end_row = i+chunk_size-1; } t++; } for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, subColSummarize_median_log_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else buffer = Calloc(cols,double); for (j =0; j < length_rowIndexList; j++){ ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j)); MedianLog_noSE(matrix, rows, cols, cur_rows, buffer, ncur_rows); for (i = 0; i < cols; i++){ results[i*length_rowIndexList + j] = buffer[i]; } } Free(buffer); #endif UNPROTECT(1); return R_summaries; } #ifdef USE_PTHREADS static void *subColSummarize_log_median_group(void *data){ struct loop_data *args = (struct loop_data *) data; int *cur_rows; double *buffer; int i, j; int ncur_rows; buffer = Calloc(args->cols,double); for (j = args->start_row; j <= args->end_row; j++){ ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j)); LogMedian_noSE(args->matrix, args->rows, args->cols, cur_rows, buffer, ncur_rows); pthread_mutex_lock (&mutex_R); for (i = 0; i < args->cols; i++){ args->results[i*args->length_rowIndexList + j] = buffer[i]; } pthread_mutex_unlock (&mutex_R); } Free(buffer); } #endif SEXP R_subColSummarize_log_median(SEXP RMatrix, SEXP R_rowIndexList){ SEXP R_summaries; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *buffer; int *cur_rows; int rows, cols; int length_rowIndexList = LENGTH(R_rowIndexList); int ncur_rows; int i,j; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_summaries = allocMatrix(REALSXP,length_rowIndexList,cols)); results = NUMERIC_POINTER(R_summaries); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of subColumns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of subColumns is less than the number of threads */ if (num_threads < length_rowIndexList){ chunk_size = length_rowIndexList/num_threads; chunk_size_d = ((double) length_rowIndexList)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data); args[0].matrix = matrix; args[0].results = results; args[0].R_rowIndexList = &R_rowIndexList; args[0].rows = rows; args[0].cols = cols; args[0].length_rowIndexList = length_rowIndexList; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_row = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_row = i+chunk_size; i++; } else{ args[t].end_row = i+chunk_size-1; } t++; } for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, subColSummarize_log_median_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else buffer = Calloc(cols,double); for (j =0; j < length_rowIndexList; j++){ ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j)); LogMedian_noSE(matrix, rows, cols, cur_rows, buffer, ncur_rows); for (i = 0; i < cols; i++){ results[i*length_rowIndexList + j] = buffer[i]; } } Free(buffer); #endif UNPROTECT(1); return R_summaries; } #ifdef USE_PTHREADS static void *subColSummarize_median_group(void *data){ struct loop_data *args = (struct loop_data *) data; int *cur_rows; double *buffer; int i, j; int ncur_rows; buffer = Calloc(args->cols,double); for (j = args->start_row; j <= args->end_row; j++){ ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j)); ColMedian_noSE(args->matrix, args->rows, args->cols, cur_rows, buffer, ncur_rows); pthread_mutex_lock (&mutex_R); for (i = 0; i < args->cols; i++){ args->results[i*args->length_rowIndexList + j] = buffer[i]; } pthread_mutex_unlock (&mutex_R); } Free(buffer); } #endif SEXP R_subColSummarize_median(SEXP RMatrix, SEXP R_rowIndexList){ SEXP R_summaries; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *buffer; int *cur_rows; int rows, cols; int length_rowIndexList = LENGTH(R_rowIndexList); int ncur_rows; int i,j; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_summaries = allocMatrix(REALSXP,length_rowIndexList,cols)); results = NUMERIC_POINTER(R_summaries); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of subColumns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of subColumns is less than the number of threads */ if (num_threads < length_rowIndexList){ chunk_size = length_rowIndexList/num_threads; chunk_size_d = ((double) length_rowIndexList)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data); args[0].matrix = matrix; args[0].results = results; args[0].R_rowIndexList = &R_rowIndexList; args[0].rows = rows; args[0].cols = cols; args[0].length_rowIndexList = length_rowIndexList; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_row = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_row = i+chunk_size; i++; } else{ args[t].end_row = i+chunk_size-1; } t++; } for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, subColSummarize_median_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else buffer = Calloc(cols,double); for (j =0; j < length_rowIndexList; j++){ ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j)); ColMedian_noSE(matrix, rows, cols, cur_rows, buffer, ncur_rows); for (i = 0; i < cols; i++){ results[i*length_rowIndexList + j] = buffer[i]; } } Free(buffer); #endif UNPROTECT(1); return R_summaries; } #ifdef USE_PTHREADS static void *subColSummarize_medianpolish_log_group(void *data){ struct loop_data *args = (struct loop_data *) data; int *cur_rows; double *buffer, *buffer2; int i, j; int ncur_rows; buffer = Calloc(args->cols,double); buffer2 = Calloc(args->cols,double); for (j = args->start_row; j <= args->end_row; j++){ ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j)); MedianPolish(args->matrix, args->rows, args->cols, cur_rows, buffer, ncur_rows, buffer2); pthread_mutex_lock (&mutex_R); for (i = 0; i < args->cols; i++){ args->results[i*args->length_rowIndexList + j] = buffer[i]; } pthread_mutex_unlock (&mutex_R); } Free(buffer); Free(buffer2); } #endif SEXP R_subColSummarize_medianpolish_log(SEXP RMatrix, SEXP R_rowIndexList){ SEXP R_summaries; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *buffer, *buffer2; int *cur_rows; int rows, cols; int length_rowIndexList = LENGTH(R_rowIndexList); int ncur_rows; int i,j; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_summaries = allocMatrix(REALSXP,length_rowIndexList,cols)); results = NUMERIC_POINTER(R_summaries); buffer = Calloc(cols,double); buffer2 = Calloc(cols,double); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of subColumns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of subColumns is less than the number of threads */ if (num_threads < length_rowIndexList){ chunk_size = length_rowIndexList/num_threads; chunk_size_d = ((double) length_rowIndexList)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data); args[0].matrix = matrix; args[0].results = results; args[0].R_rowIndexList = &R_rowIndexList; args[0].rows = rows; args[0].cols = cols; args[0].length_rowIndexList = length_rowIndexList; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_row = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_row = i+chunk_size; i++; } else{ args[t].end_row = i+chunk_size-1; } t++; } for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, subColSummarize_medianpolish_log_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else for (j =0; j < length_rowIndexList; j++){ ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j)); MedianPolish(matrix, rows, cols, cur_rows, buffer, ncur_rows, buffer2); for (i = 0; i < cols; i++){ results[i*length_rowIndexList + j] = buffer[i]; } } Free(buffer2); Free(buffer); #endif UNPROTECT(1); return R_summaries; } #ifdef USE_PTHREADS static void *subColSummarize_medianpolish_group(void *data){ struct loop_data *args = (struct loop_data *) data; int *cur_rows; double *buffer, *buffer2; int i, j; int ncur_rows; buffer = Calloc(args->cols,double); buffer2 = Calloc(args->cols,double); for (j = args->start_row; j <= args->end_row; j++){ ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j)); MedianPolish_no_log(args->matrix, args->rows, args->cols, cur_rows, buffer, ncur_rows, buffer2); pthread_mutex_lock (&mutex_R); for (i = 0; i < args->cols; i++){ args->results[i*args->length_rowIndexList + j] = buffer[i]; } pthread_mutex_unlock (&mutex_R); } Free(buffer); Free(buffer2); } #endif SEXP R_subColSummarize_medianpolish(SEXP RMatrix, SEXP R_rowIndexList){ SEXP R_summaries; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *buffer, *buffer2; int *cur_rows; int rows, cols; int length_rowIndexList = LENGTH(R_rowIndexList); int ncur_rows; int i,j; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_summaries = allocMatrix(REALSXP,length_rowIndexList,cols)); results = NUMERIC_POINTER(R_summaries); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of subColumns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of subColumns is less than the number of threads */ if (num_threads < length_rowIndexList){ chunk_size = length_rowIndexList/num_threads; chunk_size_d = ((double) length_rowIndexList)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data); args[0].matrix = matrix; args[0].results = results; args[0].R_rowIndexList = &R_rowIndexList; args[0].rows = rows; args[0].cols = cols; args[0].length_rowIndexList = length_rowIndexList; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_row = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_row = i+chunk_size; i++; } else{ args[t].end_row = i+chunk_size-1; } t++; } for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, subColSummarize_median_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else buffer = Calloc(cols,double); buffer2 = Calloc(cols,double); for (j =0; j < length_rowIndexList; j++){ ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j)); MedianPolish_no_log(matrix, rows, cols, cur_rows, buffer, ncur_rows, buffer2); for (i = 0; i < cols; i++){ results[i*length_rowIndexList + j] = buffer[i]; } } Free(buffer2); Free(buffer); #endif UNPROTECT(1); return R_summaries; } preprocessCore/src/R_subColSummarize.h0000644000126300012640000000136512127220006021475 0ustar00biocbuildphs_compbio#ifndef R_SUBCOLSUMMARIZE_H #define R_SUBCOLSUMMARIZE_H SEXP R_subColSummarize_avg_log(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_log_avg(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_avg(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_biweight_log(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_biweight(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_median_log(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_log_median(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_median(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_medianpolish_log(SEXP RMatrix, SEXP R_rowIndexList); SEXP R_subColSummarize_medianpolish(SEXP RMatrix, SEXP R_rowIndexList); #endif preprocessCore/src/R_subrcModel_interfaces.c0000644000126300012640000004432612127220006022652 0ustar00biocbuildphs_compbio/********************************************************************* ** ** file: R_subrcModel_interface.c ** ** Aim: Code which provides .Call() interfaces to the subset of rows in ** a matrix rcModel fitting ** ** Copyright (C) 2012 Ben Bolstad ** ** created by: B. M. Bolstad ** ** created on: Mar 7, 2012 ** ** History ** Mar 7, 2012 - Initial version ** *********************************************************************/ #include #include #include #include #include "rlm.h" #include "rlm_se.h" #include "psi_fns.h" #include "medianpolish.h" #ifdef USE_PTHREADS #include #include #include #define THREADS_ENV_VAR "R_THREADS" pthread_mutex_t mutex_R; struct loop_data{ double *matrix; SEXP *R_return_value; SEXP *R_rowIndexList; SEXP *PsiCode; SEXP *PsiK; SEXP *Scales; int rows; int cols; int length_rowIndexList; int start_row; int end_row; }; #endif #ifdef USE_PTHREADS static void *sub_rcModelSummarize_medianpolish_group(void *data){ struct loop_data *args = (struct loop_data *) data; int *cur_rows; double *buffer, *buffer2; int i, j, k; int ncur_rows; SEXP R_return_value_cur; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_return_value_names; double *beta; double *residuals; double *weights; double *se; double intercept; int cols = args->cols; for (j = args->start_row; j <= args->end_row; j++){ ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j)); pthread_mutex_lock(&mutex_R); PROTECT(R_return_value_cur = allocVector(VECSXP,4)); PROTECT(R_beta = allocVector(REALSXP, ncur_rows + cols)); /* PROTECT(R_weights = allocMatrix(REALSXP,ncur_rows,cols));*/ PROTECT(R_residuals = allocMatrix(REALSXP,ncur_rows,cols)); /* PROTECT(R_SE = allocVector(REALSXP,ncur_rows+cols)); */ R_weights = R_NilValue; R_SE = R_NilValue; beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); /* weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); */ SET_VECTOR_ELT(R_return_value_cur,0,R_beta); SET_VECTOR_ELT(R_return_value_cur,1,R_weights); SET_VECTOR_ELT(R_return_value_cur,2,R_residuals); SET_VECTOR_ELT(R_return_value_cur,3,R_SE); UNPROTECT(2); PROTECT(R_return_value_names= allocVector(STRSXP,4)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); setAttrib(R_return_value_cur, R_NamesSymbol,R_return_value_names); UNPROTECT(1); SET_VECTOR_ELT(*(args->R_return_value),j,R_return_value_cur); UNPROTECT(1); pthread_mutex_unlock(&mutex_R); for (k = 0; k < cols; k++){ for (i =0; i < ncur_rows; i++){ residuals[k*ncur_rows + i] = args->matrix[k*args->rows + cur_rows[i]]; } } memset(beta, 0, (ncur_rows+cols)*sizeof(double)); median_polish_fit_no_copy(residuals, ncur_rows, cols, &beta[cols], &beta[0], &intercept); for (i=0; i < cols; i++) beta[i]+=intercept; } } #endif SEXP R_sub_rcModelSummarize_medianpolish(SEXP RMatrix, SEXP R_rowIndexList){ SEXP R_return_value; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *buffer, *buffer2; int *cur_rows; int rows, cols; int length_rowIndexList = LENGTH(R_rowIndexList); int ncur_rows; int i,j; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #else SEXP R_return_value_cur; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_return_value_names; double *beta; double *residuals; double *weights; double *se; double intercept; int k; #endif PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,length_rowIndexList)); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of subColumns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of subColumns is less than the number of threads */ if (num_threads < length_rowIndexList){ chunk_size = length_rowIndexList/num_threads; chunk_size_d = ((double) length_rowIndexList)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data); args[0].matrix = matrix; args[0].R_return_value = &R_return_value; args[0].R_rowIndexList = &R_rowIndexList; args[0].rows = rows; args[0].cols = cols; args[0].length_rowIndexList = length_rowIndexList; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_row = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_row = i+chunk_size; i++; } else{ args[t].end_row = i+chunk_size-1; } t++; } for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, sub_rcModelSummarize_medianpolish_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else for (j =0; j < length_rowIndexList; j++){ ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j)); PROTECT(R_return_value_cur = allocVector(VECSXP,4)); PROTECT(R_beta = allocVector(REALSXP, ncur_rows + cols)); /* PROTECT(R_weights = allocMatrix(REALSXP,ncur_rows,cols));*/ PROTECT(R_residuals = allocMatrix(REALSXP,ncur_rows,cols)); /* PROTECT(R_SE = allocVector(REALSXP,ncur_rows+cols)); */ R_weights = R_NilValue; R_SE = R_NilValue; SET_VECTOR_ELT(R_return_value_cur,0,R_beta); SET_VECTOR_ELT(R_return_value_cur,1,R_weights); SET_VECTOR_ELT(R_return_value_cur,2,R_residuals); SET_VECTOR_ELT(R_return_value_cur,3,R_SE); UNPROTECT(2); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); /* weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); */ for (k = 0; k < cols; k++){ for (i =0; i < ncur_rows; i++){ residuals[k*ncur_rows + i] = matrix[k*rows + cur_rows[i]]; } } memset(beta, 0, (ncur_rows+cols)*sizeof(double)); median_polish_fit_no_copy(residuals, ncur_rows, cols, &beta[cols], &beta[0], &intercept); for (i=0; i < cols; i++) beta[i]+=intercept; PROTECT(R_return_value_names= allocVector(STRSXP,4)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); setAttrib(R_return_value_cur, R_NamesSymbol,R_return_value_names); UNPROTECT(2); SET_VECTOR_ELT(R_return_value,j,R_return_value_cur); } #endif UNPROTECT(1); return R_return_value; } #ifdef USE_PTHREADS static void *sub_rcModelSummarize_plm_group(void *data){ struct loop_data *args = (struct loop_data *) data; int *cur_rows; double *buffer, *buffer2; int i, j, k; int ncur_rows; SEXP R_return_value_cur; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_scale; SEXP R_return_value_names; double *Ymat; double *beta; double *residuals; double *weights; double *se; double scale=-1.0; double *scaleptr; double residSE; int cols = args->cols; for (j = args->start_row; j <= args->end_row; j++){ ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j)); pthread_mutex_lock(&mutex_R); PROTECT(R_return_value_cur = allocVector(VECSXP,5)); PROTECT(R_beta = allocVector(REALSXP, ncur_rows + cols)); PROTECT(R_weights = allocMatrix(REALSXP,ncur_rows,cols)); PROTECT(R_residuals = allocMatrix(REALSXP,ncur_rows,cols)); PROTECT(R_SE = allocVector(REALSXP,ncur_rows+cols)); PROTECT(R_scale = allocVector(REALSXP,1)); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); scaleptr = NUMERIC_POINTER(R_scale); SET_VECTOR_ELT(R_return_value_cur,0,R_beta); SET_VECTOR_ELT(R_return_value_cur,1,R_weights); SET_VECTOR_ELT(R_return_value_cur,2,R_residuals); SET_VECTOR_ELT(R_return_value_cur,3,R_SE); SET_VECTOR_ELT(R_return_value_cur,4,R_scale); UNPROTECT(5); PROTECT(R_return_value_names= allocVector(STRSXP,5)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); SET_STRING_ELT(R_return_value_names,4,mkChar("Scale")); setAttrib(R_return_value_cur, R_NamesSymbol,R_return_value_names); UNPROTECT(1); SET_VECTOR_ELT(*(args->R_return_value),j,R_return_value_cur); UNPROTECT(1); pthread_mutex_unlock(&mutex_R); if (isNull(*args->Scales)){ scaleptr[0] = -1.0; } else if (length(*args->Scales) != cols) { scaleptr[0] = NUMERIC_POINTER(*args->Scales)[0]; } Ymat = Calloc(ncur_rows*cols,double); for (k = 0; k < cols; k++){ for (i =0; i < ncur_rows; i++){ Ymat[k*ncur_rows + i] = args->matrix[k*args->rows + cur_rows[i]]; } } rlm_fit_anova_scale(Ymat, ncur_rows, cols, scaleptr, beta, residuals, weights, PsiFunc(asInteger(*args->PsiCode)),asReal(*args->PsiK), 20, 0); rlm_compute_se_anova(Ymat, ncur_rows, cols, beta, residuals, weights,se, (double *)NULL, &residSE, 4, PsiFunc(asInteger(*args->PsiCode)),asReal(*args->PsiK)); beta[ncur_rows+cols -1] = 0.0; for (i = cols; i < ncur_rows + cols -1; i++) beta[ncur_rows+cols -1]-=beta[i]; Free(Ymat); } } #endif SEXP R_sub_rcModelSummarize_plm(SEXP RMatrix, SEXP R_rowIndexList, SEXP PsiCode, SEXP PsiK, SEXP Scales){ SEXP R_return_value; SEXP dim1; double *matrix=NUMERIC_POINTER(RMatrix); double *results, *buffer, *buffer2; int *cur_rows; int rows, cols; int length_rowIndexList = LENGTH(R_rowIndexList); int ncur_rows; int i,j; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #else SEXP R_return_value_cur; SEXP R_weights; SEXP R_residuals; SEXP R_beta; SEXP R_SE; SEXP R_scale; SEXP R_return_value_names; double *Ymat; double *beta; double *residuals; double *weights; double *se; double scale=-1.0; double *scaleptr; double residSE; int k; #endif PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); PROTECT(R_return_value = allocVector(VECSXP,length_rowIndexList)); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of subColumns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of subColumns is less than the number of threads */ if (num_threads < length_rowIndexList){ chunk_size = length_rowIndexList/num_threads; chunk_size_d = ((double) length_rowIndexList)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data); args[0].matrix = matrix; args[0].R_return_value = &R_return_value; args[0].R_rowIndexList = &R_rowIndexList; args[0].PsiCode = &PsiCode; args[0].PsiK = &PsiK; args[0].Scales = &Scales; args[0].rows = rows; args[0].cols = cols; args[0].length_rowIndexList = length_rowIndexList; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_row = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_row = i+chunk_size; i++; } else{ args[t].end_row = i+chunk_size-1; } t++; } for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, sub_rcModelSummarize_plm_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else for (j =0; j < length_rowIndexList; j++){ ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j)); PROTECT(R_return_value_cur = allocVector(VECSXP,5)); PROTECT(R_beta = allocVector(REALSXP, ncur_rows + cols)); PROTECT(R_weights = allocMatrix(REALSXP,ncur_rows,cols)); PROTECT(R_residuals = allocMatrix(REALSXP,ncur_rows,cols)); PROTECT(R_SE = allocVector(REALSXP,ncur_rows+cols)); PROTECT(R_scale = allocVector(REALSXP,1)); SET_VECTOR_ELT(R_return_value_cur,0,R_beta); SET_VECTOR_ELT(R_return_value_cur,1,R_weights); SET_VECTOR_ELT(R_return_value_cur,2,R_residuals); SET_VECTOR_ELT(R_return_value_cur,3,R_SE); SET_VECTOR_ELT(R_return_value_cur,4,R_scale); UNPROTECT(5); beta = NUMERIC_POINTER(R_beta); residuals = NUMERIC_POINTER(R_residuals); weights = NUMERIC_POINTER(R_weights); se = NUMERIC_POINTER(R_SE); scaleptr = NUMERIC_POINTER(R_scale); if (isNull(Scales)){ scaleptr[0] = -1.0; } else if (length(Scales) != cols) { scaleptr[0] = NUMERIC_POINTER(Scales)[0]; } Ymat = Calloc(ncur_rows*cols,double); for (k = 0; k < cols; k++){ for (i =0; i < ncur_rows; i++){ Ymat[k*ncur_rows + i] = matrix[k*rows + cur_rows[i]]; } } rlm_fit_anova_scale(Ymat, ncur_rows, cols, scaleptr, beta, residuals, weights, PsiFunc(asInteger(PsiCode)),asReal(PsiK), 20, 0); rlm_compute_se_anova(Ymat, ncur_rows, cols, beta, residuals, weights,se, (double *)NULL, &residSE, 4, PsiFunc(asInteger(PsiCode)),asReal(PsiK)); beta[ncur_rows+cols -1] = 0.0; for (i = cols; i < ncur_rows + cols -1; i++) beta[ncur_rows+cols -1]-=beta[i]; Free(Ymat); PROTECT(R_return_value_names= allocVector(STRSXP,5)); SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates")); SET_STRING_ELT(R_return_value_names,1,mkChar("Weights")); SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals")); SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors")); SET_STRING_ELT(R_return_value_names,4,mkChar("Scale")); setAttrib(R_return_value_cur, R_NamesSymbol,R_return_value_names); UNPROTECT(2); SET_VECTOR_ELT(R_return_value,j,R_return_value_cur); } #endif UNPROTECT(1); return R_return_value; } preprocessCore/src/avg.c0000644000126300012640000001621612127220006016641 0ustar00biocbuildphs_compbio/************************************************************************ ** ** avg.c ** ** created by: B. M. Bolstad ** created on: Sep 16, 2007 (but based on earlier work from Nov avg_log.c) ** ** Copyright (C) 2007 Ben Bolstad ** ** last modified: Sept 16, 2007 ** ** License: LGPL V2 (same as the rest of the preprocessCore package) ** ** General discussion ** ** Implement average summarization ** ** History ** Sep 16, 2007 - Initial version ** ** ** ************************************************************************/ #include "avg.h" #include #include #include #include #include #include #include /*************************************************************************** ** ** double AvgLog(double *x, int length) ** ** double *x - a vector of PM intensities (previously log2 transformed) ** int length - length of *x ** ** take the average of log2 PM intensities. ** ***************************************************************************/ static double Avg(double *x, int length){ int i; double sum = 0.0; double mean = 0.0; for (i=0; i < length; i++){ sum = sum + x[i]; } mean = sum/(double)length; return (mean); } /*************************************************************************** ** ** static double AvgLogSE(double *x, int length) ** ** double *x - a vector of PM intensities (previously log2 transformed) ** double mean - the mean of x computed using AvgLog above ** int length - length of *x ** ** compute the standard error of the average of log2 PM intensities. ** ** ***************************************************************************/ static double AvgSE(double *x, double mean, int length){ int i; double sum = 0.0; for (i=0; i < length; i++){ sum = sum + (x[i]- mean)*(x[i] - mean); } sum = sqrt(sum/(double)(length -1)); sum = sum/sqrt((double)length); return (sum); } void colaverage_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ int i,j; for (j = 0; j < cols; j++){ results[j] = Avg(&data[j*rows],rows); resultsSE[j] = AvgSE(&data[j*rows],results[j],rows); } } /*************************************************************************** ** ** void average(double *data, int rows, int cols, double *results, double *resultsSE) ** ** aim: given a data matrix of probe intensities, compute averages in column wise manner ** ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***************************************************************************/ void colaverage(double *data, int rows, int cols, double *results, double *resultsSE){ int i,j; double *z = Calloc(rows,double); for (j = 0; j < cols; j++){ for (i =0; i < rows; i++){ z[i] = data[j*rows + i]; } results[j] = Avg(z,rows); resultsSE[j] = AvgSE(z,results[j],rows); } Free(z); } /*************************************************************************** ** ** double Average(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) ** ** aim: given a data matrix of probe intensities, and a list of rows in the matrix ** corresponding to a single probeset, compute average log2 expression measure. ** Note that data is a probes by chips matrix. ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***************************************************************************/ /*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute average log2 expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void ColAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = data[j*rows + cur_rows[i]]; } } for (j=0; j < cols; j++){ results[j] = Avg(&z[j*nprobes],nprobes); resultsSE[j] = AvgSE(&z[j*nprobes],results[j],nprobes); } Free(z); } /*************************************************************************** ** ** double AverageLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) ** ** aim: given a data matrix of probe intensities, and a list of rows in the matrix ** corresponding to a single probeset, compute average log2 expression measure. ** Note that data is a probes by chips matrix. ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***************************************************************************/ /*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute average log2 expression measure. * Note that data is a probes by chips matrix. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * * */ void ColAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = data[j*rows + cur_rows[i]]; } } for (j=0; j < cols; j++){ results[j] = Avg(&z[j*nprobes],nprobes); } Free(z); } preprocessCore/src/avg.h0000644000126300012640000000066412127220006016646 0ustar00biocbuildphs_compbio#ifndef AVG_H #define AVG_H void ColAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void ColAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); void colaverage_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); void colaverage(double *data, int rows, int cols, double *results, double *resultsSE); #endif preprocessCore/src/avg_log.c0000644000126300012640000002000312127220006017467 0ustar00biocbuildphs_compbio/************************************************************************ ** ** avg_log.c ** ** created by: B. M. Bolstad ** created on: Jan 7, 2002 (but based on earlier work from Nov 2002) ** ** Copyright (C) 2002-2007 Ben Bolstad ** ** last modified: Jan 7, 2003 ** ** License: LGPL V2 (same as the rest of the preprocessCore package) ** ** General discussion ** ** Implement avgerage log2 pm summarization, with or without normalization ** ** Nov 2, 2002 - modify so that it will work efficently with affy2 ** Jan 3, 2003 - Clean up commenting, prepare for integration in AffyExtensions ** Jan 7, 2003 - Make function standalone, to prepare for later combination into ** a more general framework. ** Jul 23, 2003 - add parameter for computing SE and SE implemented ** Oct 5, 2003 - add output_param ** Oct 10, 2003 - added threestepPLM version of this summary. ** May 19, 2007 - branch out of affyPLM into a new package preprocessCore, then restructure the code. Add doxygen style documentation ** May 26, 2007 - fix memory leak in average_log. add additional interfaces ** Sep 16, 2007 - fix error in how StdError is computed ** ************************************************************************/ #include "avg_log.h" #include #include #include #include #include #include #include /*************************************************************************** ** ** double AvgLog(double *x, int length) ** ** double *x - a vector of PM intensities (previously log2 transformed) ** int length - length of *x ** ** take the average of log2 PM intensities. ** ***************************************************************************/ static double AvgLog(double *x, int length){ int i; double sum = 0.0; double mean = 0.0; for (i=0; i < length; i++){ sum = sum + x[i]; } mean = sum/(double)length; return (mean); } /*************************************************************************** ** ** static double AvgLogSE(double *x, int length) ** ** double *x - a vector of PM intensities (previously log2 transformed) ** double mean - the mean of x computed using AvgLog above ** int length - length of *x ** ** compute the standard error of the average of log2 PM intensities. ** ** ***************************************************************************/ static double AvgLogSE(double *x, double mean, int length){ int i; double sum = 0.0; for (i=0; i < length; i++){ sum = sum + (x[i]- mean)*(x[i] - mean); } sum = sqrt(sum/(double)(length-1)); sum = sum/sqrt((double)length); return (sum); } void averagelog_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ int i,j; for (j = 0; j < cols; j++){ for (i =0; i < rows; i++){ data[j*rows + i] = log(data[j*rows + i])/log(2.0); } results[j] = AvgLog(&data[j*rows],rows); resultsSE[j] = AvgLogSE(&data[j*rows],results[j],rows); } } /*************************************************************************** ** ** void averagelog(double *data, int rows, int cols, double *results, double *resultsSE) ** ** aim: given a data matrix of probe intensities, compute average of log2 values in column wise manner ** ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***************************************************************************/ void averagelog(double *data, int rows, int cols, double *results, double *resultsSE){ int i,j; double *z = Calloc(rows,double); for (j = 0; j < cols; j++){ for (i =0; i < rows; i++){ z[i] = log(data[j*rows + i])/log(2.0); } results[j] = AvgLog(z,rows); resultsSE[j] = AvgLogSE(z,results[j],rows); } Free(z); } /*************************************************************************** ** ** double AverageLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) ** ** aim: given a data matrix of probe intensities, and a list of rows in the matrix ** corresponding to a single probeset, compute average log2 expression measure. ** Note that data is a probes by chips matrix. ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***************************************************************************/ /*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute average log2 expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void AverageLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = log(data[j*rows + cur_rows[i]])/log(2.0); } } for (j=0; j < cols; j++){ results[j] = AvgLog(&z[j*nprobes],nprobes); resultsSE[j] = AvgLogSE(&z[j*nprobes],results[j],nprobes); } Free(z); } /*************************************************************************** ** ** double AverageLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) ** ** aim: given a data matrix of probe intensities, and a list of rows in the matrix ** corresponding to a single probeset, compute average log2 expression measure. ** Note that data is a probes by chips matrix. ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***************************************************************************/ /*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute average log2 expression measure. * Note that data is a probes by chips matrix. * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * * */ void AverageLog_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = log(data[j*rows + cur_rows[i]])/log(2.0); } } for (j=0; j < cols; j++){ results[j] = AvgLog(&z[j*nprobes],nprobes); } Free(z); } preprocessCore/src/avg_log.h0000644000126300012640000000067412127220006017510 0ustar00biocbuildphs_compbio#ifndef AVG_LOG_H #define AVG_LOG_H void AverageLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void AverageLog_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); void averagelog_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); void averagelog(double *data, int rows, int cols, double *results, double *resultsSE); #endif preprocessCore/src/biweight.c0000644000126300012640000002057312127220006017667 0ustar00biocbuildphs_compbio/************************************************************************ ** ** file: biweight.c ** ** Copyright (C) 2002-2007 Ben Bolstad ** ** aim: implement the tukey biweight - one step method of summarizing a probeset ** ** created by: B. M. Bolstad ** created on: Jan 7, 2003 (But based on a file mas5.c created in Nov 2002) ** ** last modified: Jan 7, 2003 ** ** License: LGPL V2 (same as the rest of the preprocessCore package) ** ** General discussion ** ** Implement Tukey Biweight Summarization method. ** ** ** Nov, 2002 - Initial versions ** Jan 2, 2003 - Clean up commenting, prepare for integration into AffyExtensions version 0.4 ** Jan 7, 2003 - make the code a standalone file, data structure manipulation will be handled ** elsewhere. ** Jul 23, 2003 - SE parameter added and implemented ** Oct 10, 2003 - added in PLM version ** Apr 5, 2004 - Change mallocs to Callocs ** May 19, 2007 - branch out of affyPLM into a new package preprocessCore, then restructure the code. Add doxygen style documentation ** Sep 16, 2007 - fix bug in tukeybiweight ** Sep 19, 2007 - add TukeyBiweight_noSE ** ************************************************************************/ /*#include "threestep_common.h" */ #include "biweight.h" #include "rma_common.h" #include #include #include #include #include #include #include /****************************************************************************** ** ** double weight_bisquare(double x) ** ** computes bisquare weights ** ** double x - data ** ** returns bisquare weight ** *******************************************************************************/ static double weight_bisquare(double x){ if (fabs(x) <= 1.0){ return (1-x*x)*(1-x*x); } else { return 0; } } /**************************************************************************** ** ** double Tukey_Biweight(double *x, int length) ** ** implements one step Tukey's Biweight as documented in the Affymetrix ** Statistical Algorithms Description Document. ** ** double *x - vector of data ** int length - length of *x ** ****************************************************************************/ double Tukey_Biweight(double *x, int length){ double median; int i; double *buffer = (double *)Calloc(length,double); double c = 5.0; double epsilon = 0.0001; double S; double sum = 0.0; double sumw = 0.0; for (i=0; i < length; i++){ buffer[i] = x[i]; } qsort(buffer,length,sizeof(double),(int(*)(const void*, const void*))sort_double); if (length%2 == 0){ median = (buffer[length/2 -1] + buffer[length/2])/2.0; } else { median = buffer[length/2]; } /* printf("%f \n",median); */ for (i=0; i < length; i++){ buffer[i] = fabs(x[i] - median); } qsort(buffer,length,sizeof(double),(int(*)(const void*, const void*))sort_double); if (length%2 == 0){ S = (buffer[length/2 -1] + buffer[length/2])/2.0; } else { S = buffer[length/2]; } /* printf("%f \n",S); */ for (i=0; i < length; i++){ buffer[i] = (x[i] - median)/(c*S + epsilon); } for (i =0; i < length; i++){ sum+= weight_bisquare(buffer[i])*x[i]; sumw += weight_bisquare(buffer[i]); } Free(buffer); return(sum/sumw); } /**************************************************************************** ** ** double Tukey_Biweight_SE(double *x, double BW, int length) ** ** implements one step Tukey's Biweight SE as documented in the Affymetrix ** Statistical Algorithms Description Document. ** ** double *x - vector of data ** int length - length of *x ** ****************************************************************************/ static double Tukey_Biweight_SE(double *x,double BW, int length){ double median; int i; double *buffer = (double *)Calloc(length,double); double c = 5.0; double epsilon = 0.0001; double S; double sum = 0.0; double sumw = 0.0; for (i=0; i < length; i++){ buffer[i] = x[i]; } qsort(buffer,length,sizeof(double),(int(*)(const void*, const void*))sort_double); if (length%2 == 0){ median = (buffer[length/2 -1] + buffer[length/2])/2.0; } else { median = buffer[length/2]; } /* printf("%f \n",median); */ for (i=0; i < length; i++){ buffer[i] = fabs(x[i] - median); } qsort(buffer,length,sizeof(double),(int(*)(const void*, const void*))sort_double); if (length%2 == 0){ S = (buffer[length/2 -1] + buffer[length/2])/2.0; } else { S = buffer[length/2]; } /* printf("%f \n",S); */ for (i=0; i < length; i++){ buffer[i] = (x[i] - median)/(c*S + epsilon); } for (i =0; i < length; i++){ sum+= weight_bisquare(buffer[i])*weight_bisquare(buffer[i])*(x[i]- BW)*(x[i] - BW); if (buffer[i] < 1.0){ sumw += (1.0-buffer[i]*buffer[i])*(1.0 - 5.0*buffer[i]*buffer[i]); } } Free(buffer); return(sqrt(sum)/fabs(sumw)); } void tukeybiweight(double *data, int rows, int cols, double *results, double *resultsSE){ int i,j; double *z = Calloc(rows,double); for (j = 0; j < cols; j++){ for (i =0; i < rows; i++){ z[i] = log(data[j*rows + i])/log(2.0); } results[j] = Tukey_Biweight(z,rows); resultsSE[j] = Tukey_Biweight_SE(z,results[j],rows); } Free(z); } void tukeybiweight_no_log(double *data, int rows, int cols, double *results, double *resultsSE){ int i,j; double *z = Calloc(rows,double); for (j = 0; j < cols; j++){ for (i =0; i < rows; i++){ z[i] = data[j*rows + i]; } results[j] = Tukey_Biweight(z,rows); resultsSE[j] = Tukey_Biweight_SE(z,results[j],rows); } Free(z); } /********************************************************************************** ** ** void tukeybiweight(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) ** ** aim: given a data matrix of probe intensities, and a list of rows in the matrix ** corresponding to a single probeset, compute tukey biweight expression measure. ** Note that data is a probes by chips matrix, apply tukeys biweight to columns ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***********************************************************************************/ /*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute tukey biweight expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void TukeyBiweight(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ int i,j; double *z = Calloc(nprobes,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[i] = log(data[j*rows + cur_rows[i]])/log(2.0); } results[j] = Tukey_Biweight(z,nprobes); resultsSE[j] = Tukey_Biweight_SE(z,results[j],nprobes); } Free(z); } void TukeyBiweight_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ int i,j; double *z = Calloc(nprobes,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[i] = log(data[j*rows + cur_rows[i]])/log(2.0); } results[j] = Tukey_Biweight(z,nprobes); } Free(z); } void TukeyBiweight_no_log_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ int i,j; double *z = Calloc(nprobes,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[i] = data[j*rows + cur_rows[i]]; } results[j] = Tukey_Biweight(z,nprobes); } Free(z); } preprocessCore/src/biweight.h0000644000126300012640000000115012127220006017662 0ustar00biocbuildphs_compbio#ifndef BIWEIGHT_H #define BIWEIGHT_H 1 void tukeybiweight(double *data, int rows, int cols, double *results, double *resultsSE); void tukeybiweight_no_log(double *data, int rows, int cols, double *results, double *resultsSE); void TukeyBiweight(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void TukeyBiweight_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); void TukeyBiweight_no_log_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); double Tukey_Biweight(double *x, int length); #endif preprocessCore/src/dqrls.h0000644000126300012640000000024512127220006017211 0ustar00biocbuildphs_compbioextern int dqrls_(double *x, int *n, int *p, double *y, int *ny, double *tol, double *b, double *rsd, double *qty, int *k, int *jpvt, double *qraux, double *work); preprocessCore/src/init_package.c0000644000126300012640000002502712127220006020502 0ustar00biocbuildphs_compbio/***************************************************** ** ** file: init_package.c ** ** Copyright (C) 2007-2008 B. M. Bolstad ** ** aim: Register c code routines so that they can be called in other packages. **" ** History ** May 20, 2007 - Initial version ** May 24-27, 2007 - add in additional registered functions ** Sep 9, 2007 - add the R_rlm_rma_default and R_wrlm_rma_default_model as registered functions ** Sep 10, 2007 - add logmedian medianlog dunctions ** Mar 11, 2007 - add R_rlm_rma_given_probe_effects etc functions ** *****************************************************/ #include "qnorm.h" #include "medianpolish.h" #include "log_avg.h" #include "avg_log.h" #include "avg.h" #include "median_log.h" #include "log_median.h" #include "median.h" #include "biweight.h" #include "lm.h" #include "rlm.h" #include "rlm_se.h" #include "R_rlm_interfaces.h" #include "R_colSummarize.h" #include "R_subColSummarize.h" #include "plmr.h" #include "R_plmr_interfaces.h" #include "rma_background4.h" #include "weightedkerneldensity.h" #include #include #include #if _MSC_VER >= 1000 __declspec(dllexport) #endif static const R_CallMethodDef callMethods[] = { {"R_qnorm_c",(DL_FUNC)&R_qnorm_c,2}, {"R_qnorm_robust_weights", (DL_FUNC)&R_qnorm_robust_weights, 3}, {"R_qnorm_robust_c",(DL_FUNC)&R_qnorm_robust_c,6}, {"R_qnorm_determine_target",(DL_FUNC)&R_qnorm_determine_target,2}, {"R_qnorm_using_target",(DL_FUNC)&R_qnorm_using_target,3}, {"R_qnorm_within_blocks",(DL_FUNC)&R_qnorm_within_blocks,3}, {"R_rlm_rma_default_model",(DL_FUNC)&R_rlm_rma_default_model,4}, {"R_wrlm_rma_default_model", (DL_FUNC)&R_wrlm_rma_default_model,5}, {"R_medianpolish_rma_default_model", (DL_FUNC)&R_medianpolish_rma_default_model,1}, {"R_colSummarize_avg_log", (DL_FUNC)&R_colSummarize_avg_log,1}, {"R_colSummarize_log_avg", (DL_FUNC)&R_colSummarize_log_avg,1}, {"R_colSummarize_median_log", (DL_FUNC)&R_colSummarize_median_log,1}, {"R_colSummarize_log_median", (DL_FUNC)&R_colSummarize_log_median,1}, {"R_colSummarize_biweight_log", (DL_FUNC)&R_colSummarize_biweight_log,1}, {"R_colSummarize_medianpolish_log",(DL_FUNC)&R_colSummarize_medianpolish_log,1}, {"R_colSummarize_avg",(DL_FUNC)&R_colSummarize_avg,1}, {"R_colSummarize_median",(DL_FUNC)&R_colSummarize_median,1}, {"R_colSummarize_biweight", (DL_FUNC)&R_colSummarize_biweight,1}, {"R_colSummarize_medianpolish",(DL_FUNC)&R_colSummarize_medianpolish,1}, {"R_subColSummarize_avg_log", (DL_FUNC)&R_subColSummarize_avg_log,2}, {"R_subColSummarize_log_avg", (DL_FUNC)&R_subColSummarize_log_avg,2}, {"R_subColSummarize_avg", (DL_FUNC)&R_subColSummarize_avg,2}, {"R_subColSummarize_biweight_log", (DL_FUNC)&R_subColSummarize_biweight_log,2}, {"R_subColSummarize_biweight", (DL_FUNC)&R_subColSummarize_biweight,2}, {"R_subColSummarize_median_log", (DL_FUNC)&R_subColSummarize_median_log,2}, {"R_subColSummarize_log_median", (DL_FUNC)&R_subColSummarize_log_median,2}, {"R_subColSummarize_median",(DL_FUNC)&R_subColSummarize_median,2}, {"R_subColSummarize_medianpolish_log",(DL_FUNC)&R_subColSummarize_medianpolish_log,2}, {"R_subColSummarize_medianpolish",(DL_FUNC)&R_subColSummarize_medianpolish,2}, {"R_plmr_model",(DL_FUNC)&R_plmr_model,3}, {"R_wplmr_model", (DL_FUNC)&R_wplmr_model,4}, {"R_plmrr_model",(DL_FUNC)&R_plmrr_model,3}, {"R_wplmrr_model", (DL_FUNC)&R_wplmrr_model,4}, {"R_plmrc_model",(DL_FUNC)&R_plmrc_model,3}, {"R_wplmrc_model", (DL_FUNC)&R_wplmrc_model,4}, {"R_rlm_rma_given_probe_effects", (DL_FUNC)&R_rlm_rma_given_probe_effects,5}, {"R_wrlm_rma_given_probe_effects", (DL_FUNC)&R_wrlm_rma_given_probe_effects,6}, {"R_rma_bg_correct",(DL_FUNC)&R_rma_bg_correct,2}, {NULL, NULL, 0} }; void R_init_preprocessCore(DllInfo *info){ R_registerRoutines(info, NULL, callMethods, NULL, NULL); /* The normalization routines */ R_RegisterCCallable("preprocessCore", "qnorm_c", (DL_FUNC)&qnorm_c); R_RegisterCCallable("preprocessCore", "R_qnorm_robust_weights", (DL_FUNC)&R_qnorm_robust_weights); R_RegisterCCallable("preprocessCore", "qnorm_robust_c", (DL_FUNC)&qnorm_robust_c); R_RegisterCCallable("preprocessCore", "qnorm_c_using_target", (DL_FUNC)&qnorm_c_using_target); R_RegisterCCallable("preprocessCore", "qnorm_c_determine_target", (DL_FUNC)&qnorm_c_determine_target); R_RegisterCCallable("preprocessCore", "qnorm_c_within_blocks", (DL_FUNC)&qnorm_c_within_blocks); /* The summarization routines */ R_RegisterCCallable("preprocessCore", "median_polish_fit_no_copy", (DL_FUNC)&median_polish_fit_no_copy); R_RegisterCCallable("preprocessCore", "median_polish_no_copy", (DL_FUNC)&median_polish_no_copy); R_RegisterCCallable("preprocessCore", "median_polish_log2_no_copy", (DL_FUNC)&median_polish_log2_no_copy); R_RegisterCCallable("preprocessCore", "median_polish_log2", (DL_FUNC)&median_polish_log2); R_RegisterCCallable("preprocessCore", "median_polish", (DL_FUNC)&median_polish); R_RegisterCCallable("preprocessCore", "MedianPolish", (DL_FUNC)&MedianPolish); R_RegisterCCallable("preprocessCore", "MedianPolish_no_log", (DL_FUNC)&MedianPolish_no_log); R_RegisterCCallable("preprocessCore","AverageLog", (DL_FUNC)&AverageLog); R_RegisterCCallable("preprocessCore","averagelog_no_copy", (DL_FUNC)&averagelog_no_copy); R_RegisterCCallable("preprocessCore","averagelog", (DL_FUNC)&averagelog); R_RegisterCCallable("preprocessCore","AverageLog_noSE", (DL_FUNC)&AverageLog_noSE); R_RegisterCCallable("preprocessCore","ColAverage", (DL_FUNC)&ColAverage); R_RegisterCCallable("preprocessCore","colaverage_no_copy", (DL_FUNC)&colaverage_no_copy); R_RegisterCCallable("preprocessCore","colaverage", (DL_FUNC)&colaverage); R_RegisterCCallable("preprocessCore","ColAverage_noSE", (DL_FUNC)&ColAverage_noSE); R_RegisterCCallable("preprocessCore","MedianLog", (DL_FUNC)&MedianLog); R_RegisterCCallable("preprocessCore","medianlog_no_copy", (DL_FUNC)&medianlog_no_copy); R_RegisterCCallable("preprocessCore","medianlog", (DL_FUNC)&medianlog); R_RegisterCCallable("preprocessCore","MedianLog_noSE", (DL_FUNC)&MedianLog_noSE); R_RegisterCCallable("preprocessCore","LogMedian", (DL_FUNC)&LogMedian); R_RegisterCCallable("preprocessCore","logmedian_no_copy", (DL_FUNC)&logmedian_no_copy); R_RegisterCCallable("preprocessCore","logmedian", (DL_FUNC)&logmedian); R_RegisterCCallable("preprocessCore","LogMedian_noSE", (DL_FUNC)&LogMedian_noSE); R_RegisterCCallable("preprocessCore","ColMedian", (DL_FUNC)&ColMedian); R_RegisterCCallable("preprocessCore","colmedian_no_copy", (DL_FUNC)&colmedian_no_copy); R_RegisterCCallable("preprocessCore","colmedian", (DL_FUNC)&colmedian); R_RegisterCCallable("preprocessCore","ColMedian_noSE", (DL_FUNC)&ColMedian_noSE); R_RegisterCCallable("preprocessCore","logaverage", (DL_FUNC)&logaverage); R_RegisterCCallable("preprocessCore","LogAverage", (DL_FUNC)&LogAverage); R_RegisterCCallable("preprocessCore","LogAverage_noSE", (DL_FUNC)&LogAverage_noSE); R_RegisterCCallable("preprocessCore","tukeybiweight", (DL_FUNC)&tukeybiweight); R_RegisterCCallable("preprocessCore","tukeybiweight_no_log", (DL_FUNC)&tukeybiweight_no_log); R_RegisterCCallable("preprocessCore","TukeyBiweight", (DL_FUNC)&TukeyBiweight); R_RegisterCCallable("preprocessCore","TukeyBiweight_noSE", (DL_FUNC)&TukeyBiweight_noSE); R_RegisterCCallable("preprocessCore","TukeyBiweight_no_log_noSE", (DL_FUNC)&TukeyBiweight_no_log_noSE); R_RegisterCCallable("preprocessCore","Tukey_Biweight", (DL_FUNC)&Tukey_Biweight); R_RegisterCCallable("preprocessCore","lm_wfit", (DL_FUNC)&lm_wfit); R_RegisterCCallable("preprocessCore","rlm_fit", (DL_FUNC)&rlm_fit); R_RegisterCCallable("preprocessCore","rlm_wfit", (DL_FUNC)&rlm_wfit); R_RegisterCCallable("preprocessCore","rlm_compute_se", (DL_FUNC)&rlm_compute_se); R_RegisterCCallable("preprocessCore", "med_abs", (DL_FUNC)&med_abs); /* The PLM functions */ R_RegisterCCallable("preprocessCore","rlm_fit_anova", (DL_FUNC)&rlm_fit_anova); R_RegisterCCallable("preprocessCore","rlm_wfit_anova", (DL_FUNC)&rlm_wfit_anova); R_RegisterCCallable("preprocessCore","rlm_compute_se_anova", (DL_FUNC)&rlm_compute_se_anova); /* The PLM-R functions */ R_RegisterCCallable("preprocessCore","plmr_fit", (DL_FUNC)&plmr_fit); R_RegisterCCallable("preprocessCore","plmr_wfit", (DL_FUNC)&plmr_wfit); R_RegisterCCallable("preprocessCore","plmrc_fit", (DL_FUNC)&plmrc_fit); R_RegisterCCallable("preprocessCore","plmrc_wfit", (DL_FUNC)&plmrc_wfit); R_RegisterCCallable("preprocessCore","plmrr_fit", (DL_FUNC)&plmrr_fit); R_RegisterCCallable("preprocessCore","plmrr_wfit", (DL_FUNC)&plmrr_wfit); /* The PLM functions that work with fixed row(probe) effects */ R_RegisterCCallable("preprocessCore","rlm_fit_anova_given_probe_effects", (DL_FUNC)&rlm_fit_anova_given_probe_effects); R_RegisterCCallable("preprocessCore","rlm_compute_se_anova_given_probe_effects", (DL_FUNC)&rlm_compute_se_anova_given_probe_effects); R_RegisterCCallable("preprocessCore","rlm_wfit_anova_given_probe_effects", (DL_FUNC)&rlm_wfit_anova_given_probe_effects); /* RMA background correction */ R_RegisterCCallable("preprocessCore","rma_bg_adjust", (DL_FUNC)&rma_bg_adjust); R_RegisterCCallable("preprocessCore","rma_bg_parameters", (DL_FUNC)&rma_bg_parameters); R_RegisterCCallable("preprocessCore","rma_bg_correct", (DL_FUNC)&rma_bg_correct); /* R_subColSummary functions */ R_RegisterCCallable("preprocessCore","R_subColSummarize_avg_log", (DL_FUNC)&R_subColSummarize_avg_log); R_RegisterCCallable("preprocessCore","R_subColSummarize_log_avg", (DL_FUNC)&R_subColSummarize_log_avg); R_RegisterCCallable("preprocessCore","R_subColSummarize_avg", (DL_FUNC)&R_subColSummarize_avg); R_RegisterCCallable("preprocessCore","R_subColSummarize_biweight_log", (DL_FUNC)&R_subColSummarize_biweight_log); R_RegisterCCallable("preprocessCore","R_subColSummarize_biweight", (DL_FUNC)&R_subColSummarize_biweight); R_RegisterCCallable("preprocessCore","R_subColSummarize_median_log", (DL_FUNC)&R_subColSummarize_median_log); R_RegisterCCallable("preprocessCore","R_subColSummarize_log_median", (DL_FUNC)&R_subColSummarize_log_median); R_RegisterCCallable("preprocessCore","R_subColSummarize_median",(DL_FUNC)&R_subColSummarize_median); R_RegisterCCallable("preprocessCore","R_subColSummarize_medianpolish_log",(DL_FUNC)&R_subColSummarize_medianpolish_log); R_RegisterCCallable("preprocessCore","R_subColSummarize_medianpolish",(DL_FUNC)&R_subColSummarize_medianpolish); /* KernelDensity */ R_RegisterCCallable("preprocessCore","KernelDensity", (DL_FUNC)&KernelDensity); } preprocessCore/src/lm.c0000644000126300012640000001573012127220006016474 0ustar00biocbuildphs_compbio/********************************************************************* ** ** file: lm.c ** ** Aim: implement linear models. ** ** Copyright (C) 2003 Ben Bolstad ** ** created by: B. M. Bolstad ** ** created on: Jun 05, 2003 ** ** Last modified: Jun 05, 2003 ** ** The aim will be to provide a functions for fitting linear models. ** ** ** History ** ** Jun 05, 2003 - Moved lm_wfit from rlm.c to this file lm.c ** modify lm_wfit to handle cases where weight is 0. ** otherwise bad things will happen when using a psi_fn that ** gives weights of 0. ** Jul 27, 2003 - better handling of cases where a parameter ** is not estimable (set it to NA) ** Sep 14, 2003 - fix a bug where k was where a j should be in a for loop ** Mar 1, 2006 - change all comments to ansi style ** May 19, 2007 - branch out of affyPLM into a new package preprocessCore, then restructure the code. Add doxygen style documentation ** ********************************************************************/ #include #include #include #include #include "dqrls.h" #include "lm.h" /************************************************************************* ** ** void lm_wfit(double *x, double *y, double *w, int rows, int cols, double tol, double *outbeta, double *outresid) ** ** double *x - coefficient matrix: dimension rows*cols ** double *y - dependent variable: length rows ** double *w - weights for each observation: length rows ** int rows, cols - dimension of input ** double tol - machine tolerance used in qr decomp ** double *outbeta - place to output beta estimates: lenght cols ** double *outresid - place to output residuals: length rows ** ** This function computes weighted linear regression estimates using QR decomposition ** Note that a linpack routine is used to do the actual fit. ** ** For now we will assume that singularities will not exist when doing QR, but will fix later ** if becomes problematic. NB a printf comment is returned when matrix is of full rank ** but still no proper checking. ** *************************************************************************/ void lm_wfit(double *x, double *y, double *w, int rows, int cols, double tol, double *out_beta, double *out_resids){ int i,j; int ny = 1; int k; int numzero_weights = 0,nrows,totnumzero=0; double fittedvalue; double *wts = Calloc(rows,double); double *x_wts_f = Calloc(rows*cols,double); double *y_wts_f = Calloc(rows,double); double *beta = Calloc(cols,double); double *resid = Calloc(rows,double); double *qraux = Calloc(cols,double); double *qty = Calloc(rows,double); double *work = Calloc(2*cols,double); int *jpvt = Calloc(cols,int); for (i=0; i < rows; i++){ if (w[i] == 0.0){ totnumzero++; } } if (totnumzero > 0){ /* we need to chop up the X and Y matricies a little more then removing the observations that have weight = 0. In particular fit the model removing the weight 0 observations. then to compute the residuals for the weight 0 observations used fitted values and observed X to compute fitted values */ numzero_weights = 0; for (i=0; i < rows; i++){ if (w[i] > 0.0){ wts[i - numzero_weights] = sqrt(w[i]); y_wts_f[i - numzero_weights] = wts[i - numzero_weights]*y[i]; for (j = 0; j < cols; j++){ x_wts_f[j*(rows-totnumzero)+(i-numzero_weights)] = wts[i - numzero_weights]*x[j*rows+i]; } } else { numzero_weights++; } } for (j=0;j < cols; j++){ jpvt[j] = j; } nrows = rows - numzero_weights; /* now fit the model */ dqrls_(x_wts_f,&nrows,&cols,y_wts_f,&ny,&tol,beta,resid,qty,&k,jpvt,qraux,work); if (k != cols){ /* the solution is not of full rank */ /* printf("Improper fit\n"); */ for (j = 0; j < k; j++){ out_beta[j] = beta[jpvt[j]]; } for(j =k; j < cols; j++){ out_beta[jpvt[j]] = R_NaN; } } else { /* detangle beta and residual estimates */ for (j = 0; j < cols; j++){ out_beta[j] = beta[jpvt[j]]; } } /* now the model is fitted, lets compute residuals for the 0 weighted observations by first computing fitted values. */ numzero_weights = 0; for (i=0; i < rows; i++){ if (w[i] > 0){ out_resids[i] = resid[i- numzero_weights]/wts[i- numzero_weights]; } else { /* compute the fitted value */ numzero_weights++; fittedvalue = 0.0; for (j=0; j ** created on: Feb 6, 2003 (but based on earlier work from Nov 2002) ** ** last modified: Feb 6, 2003 ** ** License: LGPL V2 (same as the rest of the preprocessCore package) ** ** General discussion ** ** Implement log2 average pm summarization, with or without normalization ** ** Feb 6, 2002 - Initial version of this summarization method ** Jul 23, 2003 - parameter for storing SE added (not yet implemented) ** Oct 5, 2003 - method of adding parameters. ** May 19, 2007 - branch out of affyPLM into a new package preprocessCore, then restructure the code. Add doxygen style documentation ** Sep 19, 2007 - add LogAverage_noSE ** ************************************************************************/ #include "log_avg.h" #include "qnorm.h" #include #include #include #include #include #include #include /*************************************************************************** ** ** double LogAvg(double *x, int length) ** ** double *x - a vector of PM intensities ** int length - length of *x ** ** take the log2 of the average of PM intensities. ** ***************************************************************************/ static double LogAvg(double *x, int length){ int i; double sum = 0.0; double mean = 0.0; for (i=0; i < length; i++){ sum = sum + x[i]; } mean = sum/(double)length; mean = log(mean)/log(2.0); return (mean); } /*************************************************************************** ** ** void averagelog(double *data, int rows, int cols, double *results, double *resultsSE) ** ** aim: given a data matrix of probe intensities, compute average of log2 values in column wise manner ** ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***************************************************************************/ void logaverage(double *data, int rows, int cols, double *results, double *resultsSE){ int i,j; double *z = Calloc(rows,double); for (j = 0; j < cols; j++){ for (i =0; i < rows; i++){ z[i] = data[j*rows + i]; } results[j] = LogAvg(z,rows); resultsSE[j] = R_NaReal; } } /*************************************************************************** ** ** double LogAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) ** ** aim: given a data matrix of probe intensities, and a list of rows in the matrix ** corresponding to a single probeset, compute average log2 expression measure. ** Note that data is a probes by chips matrix. ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***************************************************************************/ /*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute log2 average expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols * * */ void LogAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = data[j*rows + cur_rows[i]]; } } for (j=0; j < cols; j++){ results[j] = LogAvg(&z[j*nprobes],nprobes); resultsSE[j] = R_NaReal; } Free(z); } void LogAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = data[j*rows + cur_rows[i]]; } } for (j=0; j < cols; j++){ results[j] = LogAvg(&z[j*nprobes],nprobes); } Free(z); } preprocessCore/src/log_avg.h0000644000126300012640000000053612127220006017505 0ustar00biocbuildphs_compbio#ifndef LOG_AVG_H #define LOG_AVG_H 1 void logaverage(double *data, int rows, int cols, double *results, double *resultsSE); void LogAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void LogAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); #endif preprocessCore/src/log_median.c0000644000126300012640000000744712127220006020170 0ustar00biocbuildphs_compbio/************************************************************************ ** ** log_median.c (was previously medianPM.c) ** ** Copyright (C) 2002-2003 Ben Bolstad ** ** created by: B. M. Bolstad ** created on: Feb 6, 2003 (but based on earlier work from Nov 2002) ** ** last modified: Feb 6, 2003 ** ** License: LGPL V2 (same as the rest of the preprocessCore package) ** ** General discussion ** ** Implement log2 median pm summarization. ** ** Feb 6, 2003 - Initial version of this summarization method ** Feb 24, 2003 - remove unused int i from LogMedian() ** Jul 23, 2003 - add a parameter for storing SE (not yet implemented) ** Oct 10, 2003 - PLM version of threestep ** Sep 10, 2007 - move functionality out of affyPLM (and into preprocessCore) ** Sep 19, 2007 - add LogMedian_noSE ** ************************************************************************/ #include "log_median.h" #include "rma_common.h" #include #include #include #include #include #include #include /*************************************************************************** ** ** double LogMedian(double *x, int length) ** ** double *x - a vector of PM intensities ** int length - length of *x ** ** take the log2 of the median of PM intensities. ** ***************************************************************************/ static double log_median(double *x, int length){ double med = 0.0; med = median(x,length); med = log(med)/log(2.0); return (med); } /*************************************************************************** ** ** double LogMedianPM(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) ** ** aim: given a data matrix of probe intensities, and a list of rows in the matrix ** corresponding to a single probeset, compute log2 Median expression measure. ** Note that data is a probes by chips matrix. ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***************************************************************************/ void LogMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = data[j*rows + cur_rows[i]]; } } for (j=0; j < cols; j++){ results[j] = log_median(&z[j*nprobes],nprobes); resultsSE[j] = R_NaReal; } Free(z); } void LogMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = data[j*rows + cur_rows[i]]; } } for (j=0; j < cols; j++){ results[j] = log_median(&z[j*nprobes],nprobes); } Free(z); } void logmedian(double *data, int rows, int cols, double *results, double *resultsSE){ int i,j; double *buffer = Calloc(rows, double); for (j=0; j < cols; j++){ for (i = 0; i < rows; i++){ buffer[i] = data[j*rows + i]; } results[j] = log_median(buffer,rows); resultsSE[j] = R_NaReal; } Free(buffer); } void logmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ int i,j; for (j=0; j < cols; j++){ results[j] = log_median(&data[j*rows],rows); resultsSE[j] = R_NaReal; } // Free(buffer); } preprocessCore/src/log_median.h0000644000126300012640000000070212127220006020160 0ustar00biocbuildphs_compbio#ifndef LOG_MEDIAN_H #define LOG_MEDIAN_H 1 void LogMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void LogMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); void logmedian(double *data, int rows, int cols, double *results, double *resultsSE); void logmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); #endif preprocessCore/src/matrix_functions.c0000644000126300012640000002546312127220006021464 0ustar00biocbuildphs_compbio/********************************************************************* ** ** file: matrix_functions.c ** ** Aim: This is where some matrix manipulation functions go. ** ** Copyright (C) 2003-2004 Ben Bolstad ** ** created by: B. M. Bolstad ** ** created on: June 22, 2004 ** ** History: ** June 22, 2004 - Initial version ** Mar 1, 2006 - change commenting style to ansi C style ** Aug 28, 2006 - change moduleCdynload to R_moduleCdynload ** Sept 26, 2006 - remove R_moduleCdynload. SHould fix windows build problems. ** ********************************************************************/ #include "rlm.h" #include "rlm_se.h" #include "psi_fns.h" #include "matrix_functions.h" #include #include #include #include #include #include #include #include /******************************************************************** ** ** two static global variables to define if lapack library loaded ** and which library to use. 0 is LINPACK, 1 is LAPACK ** ** Kind of bad, but will do it anyway ** ********************************************************************/ static int Lapack_initialized = 0; static int use_lapack = 1; /********************************************************************* ** ** static void Lapack_Init(void) ** ** this function loads the Lapack library if it has not already been ** loaded and sets the use_lapack variable to 1 so that LAPACK ** is used (for Choleski and SVD routines) ** ** ** ********************************************************************/ void Lapack_Init(void) { int res = 1; /* R_moduleCdynload("lapack", 0, 1); */ Lapack_initialized = -1; if(!res) return; /* Initializing LAPACK */ use_lapack = 1; Lapack_initialized = 1; return; } /******************************************************************** ** ** external declarations for Choleski routines (LINPACK) ** ** *******************************************************************/ extern int dpofa_(double *x, int *lda, int *n, int *j); extern int dpodi_(double *x, int *lda, int *n, double *d, int *j); /******************************************************************** ** ** external declarations for Choleski routines (LAPACK) ** ** *******************************************************************/ extern int dpotrf_(const char *uplo, const int *n, double* a, const int *lda, int *info); extern int dpotri_(const char *uplo, const int *n, double* a, const int *lda, int *info); /***************************************************************** * svd routine - LINPACK *****************************************************************/ extern int dsvdc_(double *x, int *ldx, int *n, int *p, double *s, double *e, double *u, int *ldu, double *v, int *ldv, double *work, int *job, int *info); /***************************************************************** * svd routine - LAPACK *****************************************************************/ extern int dgesdd_(const char *jobz, const int *m, const int *n, double *a, const int *lda, double *s, double *u, const int *ldu, double *vt, const int *ldvt, double *work, const int *lwork, int *iwork, int *info); /********************************************************************* ** ** int Choleski_decompose(double *X, double *L, int n) ** ** double *X - a square matrix ** double *L - space already allocated to store Cholesky decomposition ** int n - matrix dimension ** int lapack - if 0 use LINPACK otherwise LAPACK ** ** RETURNS integer code indicating success or failure 0 means no error, ** non zero indicates failure ie not positive definite ** ** this function choleski decomposes a positive definite symmetric matrix, ** on output L will contain the Choleski decomposition in the upper triangle ** ** *********************************************************************/ static int Choleski_decompose(double *X, double *L, int n, int lapack){ int i,j,error_code; char upper = 'U'; for (i=0; i < n; i++){ for (j=0; j < n; j++){ if (i > j) L[j*n+i] = 0.0; else { L[j*n+i] = X[j*n + i]; } } } if (!lapack){ dpofa_(L,&n,&n,&error_code); } else { dpotrf_(&upper,&n,L,&n,&error_code); } return error_code; } /*********************************************************************** ** ** int Choleski_2_inverse(double *X, double *Xinv, int n) ** ** double *X - matrix containing choleski decomposition in upper triangle ** double *Xinv - on output will contain the inverse ** int n - dimension of matrix ** int upperonly - if non zero return only the upper triangle of the inverse. ** int lapack - use LINPACK if 0 otherwise LAPACK ** ** RETURNS integer code, indicating success 0 or error (non zero) ** ** this function uses the choleski decomposition of a ** matrix to compute the inverse of a matrix. ** typically it would be used in conjunction with the choleski_decompose ** function above. ** ** **********************************************************************/ static int Choleski_2_inverse(double *X, double *Xinv, int n,int upperonly, int lapack){ int i,j ,error_code=0,inverseonly; double d =0.0; char upper = 'U'; for (i=0; i < n; i++){ /* check for a zero or close to zero diagonal element */ if(fabs(X[i*n+ i]) < 1e-06){ error_code = 1; return error_code; } for (j=i; j < n; j++){ Xinv[j*n + i] = X[j*n + i]; } } inverseonly = 1; if (!lapack){ dpodi_(Xinv,&n,&n,&d,&inverseonly); } else { dpotri_(&upper,&n,Xinv,&n,&error_code); } if (!upperonly){ for (i=0; i < n; i++){ for (j=0; j <= i-1; j++){ Xinv[j*n+i] = Xinv[i*n+j]; } } } return error_code; } /*********************************************************************** ** ** int Choleski_inverse(double *X, double *Xinv, double *work, int n) ** ** double *X - matrix containing choleski decomposition in upper triangle ** double *Xinv - on output will contain the inverse ** double *work - working space n*n dimension ** int n - dimension of matrix ** int upperonly - if non zero return only upper triangle of inverse. ** ** RETURNS integer code, indicating success 0 or error (non zero) ** ** This function will compute the inverse of a positive definite symmetric ** matrix using choleski decomposition. ** **********************************************************************/ int Choleski_inverse(double *X, double *Xinv, double *work, int n, int upperonly){ int error_code; error_code = Choleski_decompose(X, work, n,use_lapack); if (!error_code){ error_code = Choleski_2_inverse(work, Xinv, n,upperonly,use_lapack); } return error_code; } /*************************************************************** ** ** int SVD_compute() ** ** ** Computes the singular value decomposition of a matrix. Current ** implemtnation uses a linpack routine, but this will later be transitioned ** to a lapack routine (which is faster) ** ***************************************************************/ static int SVD_compute(double *X, int n, double *s, double *u, double *v,int lapack){ int i,j, error_code; int lwork = 7*n*n + 4*n; int job = 21; char jobz = 'A'; double *Xcopy= Calloc(n*n,double); /* Calloc(n*n,double); */ double *e = Calloc(n,double); /* Calloc(n,double); */ double *work = Calloc(n,double); /* Calloc(n,double); */ double *work2 = Calloc(lwork,double); int *iwork = Calloc(8*n,int); for (i=0; i < n; i++){ for (j=0; j < n; j++){ Xcopy[j*n + i] = X[j*n+i]; } } if (!lapack){ dsvdc_(Xcopy,&n,&n,&n,s,e,u,&n,v,&n,work,&job,&error_code); } else { dgesdd_(&jobz,&n,&n,Xcopy,&n,s,u,&n,v,&n,work2,&lwork,iwork,&error_code); } Free(iwork); Free(work2); Free(work); Free(e); Free(Xcopy); return error_code; } /*************************************************************** ** ** int SVD_2_inverse(double *Xinv, int n, double *s, double *u, double *v,int lapack) ** ** double *Xinv - on exit contains the inverse ** int n - Xinv is n by n ** double *s - SVD components length n ** double *u - SVD components n by n ** double *v - SVD components n by n ** int lapack - non zero if the decomposition was done by a LAPACK routine (implies v is the transpose) ** ** given a Singular value decomposition of a matrix compute ** the generalized inverse. ** ***************************************************************/ static int SVD_2_inverse(double *Xinv, int n, double *s, double *u, double *v,int lapack){ double tolerance = 1e-7; /* 1.490116e-08; */ int i,j,k; int nonzero =n; for (i = 0; i < n; i++){ if (s[i] < tolerance*s[0]){ nonzero = i; /* printf("nonzero %d",i); */ break; } } /* for all columns where $d is not to small do */ /* svdu$v %*% (t(svdu$u)* 1/svdu$d); */ for (i = 0; i < n; i++){ for (j = 0; j < nonzero; j++){ u[j*n + i] = u[j*n+i] * 1.0/s[j]; } } if (!lapack){ for (i = 0; i < n; i++){ for (j = 0; j < n; j++){ Xinv[j*n+i] = 0.0; for (k=0; k < nonzero; k++){ Xinv[j*n+i]+= v[k*n+i] * u[k*n+j]; } } } } else { /* lapack so v is transposed */ for (i = 0; i < n; i++){ for (j = 0; j < n; j++){ Xinv[j*n+i] = 0.0; for (k=0; k < nonzero; k++){ Xinv[j*n+i]+= v[i*n+k] * u[k*n+j]; } } } } return 0; } /*************************************************************** ** ** int SVD_inverse(double *X, double *Xinv, int n) ** ** double *X - the matrix to be inverted ** double *Xinv - on exit contains inverse ** int n - X and Xinv are n by n ** ** ** given an n by n matrix compute its inverse by singular value decomposition ** this is particularly useful when dealling with singular or near singular ** matrices since if the regular inverse can not be computed a generalized ** inverse will be returned instead, rather than erroneous results. ** ** Note that we will use linpack routine for SVD at some point this will ** be transitioned to a lapack routine. (This has now been done). ** ** ** **************************************************************/ int SVD_inverse(double *X, double *Xinv, int n){ int error_code=0; double *s = Calloc(n+1,double); double *v = Calloc(n*n,double); double *u = Calloc(n*n,double); error_code = SVD_compute(X, n, s, u, v,use_lapack); SVD_2_inverse(Xinv,n, s, u, v,use_lapack); return error_code; Free(s); Free(v); Free(u); } /* void R_SVD_compute(double *X, int *n, double *s, double *u, double *v){ SVD_compute(X, *n,s,u, v); } */ void R_SVD_inverse(double *X, double *Xinv, int *n){ SVD_inverse(X, Xinv,*n); } preprocessCore/src/matrix_functions.h0000644000126300012640000000034112127220006021455 0ustar00biocbuildphs_compbio#ifndef MATRIX_FUNCTIONS_H #define MATRIX_FUNCTIONS_H 1 void Lapack_Init(void); int SVD_inverse(double *X, double *Xinv, int n); int Choleski_inverse(double *X, double *Xinv, double *work, int n, int upperonly); #endif preprocessCore/src/median.c0000644000126300012640000001044412127220006017316 0ustar00biocbuildphs_compbio/************************************************************************ ** ** median.c ** ** created by: B. M. Bolstad ** created on: Feb 6, 2003 (but based on earlier work from median_log.c) ** ** Copyright (C) 2007 Ben Bolstad ** ** last modified: Sep 16, 2007 ** ** License: LGPL V2 (same as the rest of the preprocessCore package) ** ** General discussion ** ** Implement median log2 pm summarization. ** ** Sep 16, 2007 - initial version ** ************************************************************************/ #include "rma_common.h" #include "median.h" #include #include #include #include #include #include #include /*************************************************************************** ** ** double MedianLog(double *x, int length) ** ** double *x - a vector of PM intensities ** int length - length of *x ** ** take the log2 of the median of PM intensities. ** ***************************************************************************/ static double colmedian_wrapper(double *x, int length){ double med = 0.0; med = median_nocopy(x,length); return (med); } /*************************************************************************** ** ** double MedianLogPM(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) ** ** aim: given a data matrix of probe intensities, and a list of rows in the matrix ** corresponding to a single probeset, compute log2 Median expression measure. ** Note that data is a probes by chips matrix. ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***************************************************************************/ void ColMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = data[j*rows + cur_rows[i]]; } } for (j=0; j < cols; j++){ results[j] = colmedian_wrapper(&z[j*nprobes],nprobes); resultsSE[j] = R_NaReal; } Free(z); } /*************************************************************************** ** ** double MedianLogPM_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) ** ** aim: given a data matrix of probe intensities, and a list of rows in the matrix ** corresponding to a single probeset, compute log2 Median expression measure. ** Note that data is a probes by chips matrix. ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***************************************************************************/ void ColMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = data[j*rows + cur_rows[i]]; } } for (j=0; j < cols; j++){ results[j] = colmedian_wrapper(&z[j*nprobes],nprobes); } Free(z); } void colmedian(double *data, int rows, int cols, double *results, double *resultsSE){ int i,j; double *buffer = Calloc(rows, double); for (j=0; j < cols; j++){ for (i = 0; i < rows; i++){ buffer[i] = data[j*rows + i]; } results[j] = colmedian_wrapper(buffer,rows); resultsSE[j] = R_NaReal; } Free(buffer); } void colmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ int i,j; for (j=0; j < cols; j++){ results[j] = colmedian_wrapper(&data[j*rows],rows); resultsSE[j] = R_NaReal; } } preprocessCore/src/median.h0000644000126300012640000000067512127220006017330 0ustar00biocbuildphs_compbio#ifndef MEDIAN_H #define MEDIAN_H 1 void ColMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void ColMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); void colmedian(double *data, int rows, int cols, double *results, double *resultsSE); void colmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); #endif preprocessCore/src/median_log.c0000644000126300012640000001123312127220006020154 0ustar00biocbuildphs_compbio/************************************************************************ ** ** median_logPM.c ** ** created by: B. M. Bolstad ** created on: Feb 6, 2003 (but based on earlier work from Nov 2002) ** ** Copyright (C) 2003-2007 Ben Bolstad ** ** last modified: Feb 6, 2003 ** ** License: LGPL V2 (same as the rest of the preprocessCore package) ** ** General discussion ** ** Implement median log2 pm summarization. ** ** Feb 6, 2003 - Initial version of this summarization method ** Feb 24, 2003 - Remove unused variable in i from MedianLog ** Jul 23, 2003 - add SE parameter (but not yet implemented) ** Oct 10, 2003 - added PLM version ** Sept 9, 2007 - branch out of affyPLM into a new package preprocessCore ** ************************************************************************/ #include "rma_common.h" #include "median_log.h" #include #include #include #include #include #include #include /*************************************************************************** ** ** double MedianLog(double *x, int length) ** ** double *x - a vector of PM intensities ** int length - length of *x ** ** take the log2 of the median of PM intensities. ** ***************************************************************************/ static double median_log(double *x, int length){ double med = 0.0; med = median_nocopy(x,length); return (med); } /*************************************************************************** ** ** double MedianLogPM(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) ** ** aim: given a data matrix of probe intensities, and a list of rows in the matrix ** corresponding to a single probeset, compute log2 Median expression measure. ** Note that data is a probes by chips matrix. ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***************************************************************************/ void MedianLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = log(data[j*rows + cur_rows[i]])/log(2.0); } } for (j=0; j < cols; j++){ results[j] = median_log(&z[j*nprobes],nprobes); resultsSE[j] = R_NaReal; } Free(z); } /*************************************************************************** ** ** double MedianLogPM_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) ** ** aim: given a data matrix of probe intensities, and a list of rows in the matrix ** corresponding to a single probeset, compute log2 Median expression measure. ** Note that data is a probes by chips matrix. ** ** double *data - Probe intensity matrix ** int rows - number of rows in matrix *data (probes) ** int cols - number of cols in matrix *data (chips) ** int *cur_rows - indicies of rows corresponding to current probeset ** double *results - already allocated location to store expression measures (cols length) ** int nprobes - number of probes in current probeset. ** ***************************************************************************/ void MedianLog_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = log(data[j*rows + cur_rows[i]])/log(2.0); } } for (j=0; j < cols; j++){ results[j] = median_log(&z[j*nprobes],nprobes); } Free(z); } void medianlog(double *data, int rows, int cols, double *results, double *resultsSE){ int i,j; double *buffer = Calloc(rows, double); for (j=0; j < cols; j++){ for (i = 0; i < rows; i++){ buffer[i] = log(data[j*rows + i])/log(2.0); } results[j] = median_log(buffer,rows); resultsSE[j] = R_NaReal; } Free(buffer); } void medianlog_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ int i,j; for (j=0; j < cols; j++){ for (i = 0; i < rows; i++){ data[j*rows + i]= log(data[j*rows + i])/log(2.0); } results[j] = median_log(&data[j*rows],rows); resultsSE[j] = R_NaReal; } } preprocessCore/src/median_log.h0000644000126300012640000000070512127220006020163 0ustar00biocbuildphs_compbio#ifndef MEDIAN_LOG_H #define MEDIAN_LOG_H 1 void MedianLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void MedianLog_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); void medianlog(double *data, int rows, int cols, double *results, double *resultsSE); void medianlog_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); #endif preprocessCore/src/medianpolish.c0000644000126300012640000002536312127220006020543 0ustar00biocbuildphs_compbio/************************************************************************ ** ** file: medianpolish.c ** ** Copyright (C) 2002-2007 Ben Bolstad ** ** created by: B. M. Bolstad ** created on: Jan 7, 2003 (but based on code dating back as far as June 2002) ** ** last modified: Jan 7, 2003 ** ** License: LGPL V2 (same as the rest of the preprocessCore package) ** ** Median polish summary measure (used in the RMA expression measure) ** and just general medianpolish model fitting ** ** ** History ** ** Jan 7, 2003 - Initial version to fit into the three-step framework. ** Jan 13, 2003 - move median() into threestep_common.c ** Feb 24, 2003 - make maxiter get used. ** Jul 23, 2003 - add ability to accept SE parameter ** Sept 13, 2003 - introduced medianpolishPLM which returns ** most of what is required by the fitting ** algorithm ** Oct 05, 2003 - added in summary_param ** Apr 5, 2004 - change malloc/free to Calloc/Free ** Nov 13, 2006 - make median calls to median_nocopy ** May 19, 2007 - branch out of affyPLM into a new package preprocessCore, then restructure the code. Add doxygen style documentation ** May 24, 2007 - break median polish functionality down into even smaller component parts. ** ************************************************************************/ #include "medianpolish.h" #include "rma_common.h" #include #include #include #include #include #include #include /******************************************************************************* ** ** double sum_abs(double *z, int rows, int cols) ** ** double *z - matrix of doubles ** int rows - dimension of matrix ** int cols - dimension of matrix ** ** returns the sum of the absolute values of elements of the matrix *z ** ******************************************************************************/ static double sum_abs(double *z, int rows, int cols){ int i, j; double sum = 0.0; for (i=0; i < rows; i++) for (j=0; j < cols; j++) sum+=fabs(z[j*rows+i]); return sum; } /******************************************************************************** ** ** void get_row_median(double *z, double *rdelta, int rows, int cols) ** ** double *z - matrix of dimension rows*cols ** double *rdelta - on output will contain row medians (vector of length rows) ** int rows, cols - dimesion of matrix ** ** get the row medians of a matrix ** ********************************************************************************/ static void get_row_median(double *z, double *rdelta, int rows, int cols){ int i,j; double *buffer = (double *)Calloc(cols,double); for (i = 0; i < rows; i++){ for (j = 0; j < cols; j++){ buffer[j] = z[j*rows + i]; } rdelta[i] = median_nocopy(buffer,cols); } Free(buffer); } /******************************************************************************** ** ** void get_col_median(double *z, double *cdelta, int rows, int cols) ** ** double *z - matrix of dimension rows*cols ** double *cdelta - on output will contain col medians (vector of length cols) ** int rows, cols - dimesion of matrix ** ** get the col medians of a matrix ** ********************************************************************************/ static void get_col_median(double *z, double *cdelta, int rows, int cols){ int i, j; double *buffer = (double *)Calloc(rows,double); for (j = 0; j < cols; j++){ for (i = 0; i < rows; i++){ buffer[i] = z[j*rows + i]; } cdelta[j] = median_nocopy(buffer,rows); } Free(buffer); } /*********************************************************************************** ** ** void subtract_by_row(double *z, double *rdelta, int rows, int cols) ** ** double *z - matrix of dimension rows by cols ** double *rdelta - vector of length rows ** int rows, cols dimensions of matrix ** ** subtract the elements of *rdelta off each row of *z ** ***********************************************************************************/ static void subtract_by_row(double *z, double *rdelta, int rows, int cols){ int i,j; for (i = 0; i < rows; i++){ for (j = 0; j < cols; j++){ z[j*rows +i]-= rdelta[i]; } } } /*********************************************************************************** ** ** void subtract_by_col(double *z, double *cdelta, int rows, int cols) ** ** double *z - matrix of dimension rows by cols ** double *cdelta - vector of length rows ** int rows, cols dimensions of matrix ** ** subtract the elements of *cdelta off each col of *z ** ***********************************************************************************/ static void subtract_by_col(double *z, double *cdelta, int rows, int cols){ int i,j; for (j = 0; j < cols; j++){ for (i = 0; i < rows; i++){ z[j*rows +i]-= cdelta[j]; } } } /*********************************************************************************** ** ** void rmod(double *r, double *rdelta, int rows) ** ** double *r - vector of length rows ** double *rdelta - vector of length rows ** int rows, cols dimensions of matrix ** ** add elementwise *rdelta to *r ** ***********************************************************************************/ static void rmod(double *r, double *rdelta, int rows){ int i; for (i = 0; i < rows; i++){ r[i]= r[i] + rdelta[i]; } } /*********************************************************************************** ** ** void cmod(double *c, double *cdelta, int cols) ** ** double *c - vector of length rows ** double *cdelta - vector of length rows ** int cols length of vector ** ** add elementwise *cdelta to *c ** ***********************************************************************************/ static void cmod(double *c, double *cdelta, int cols){ int j; for (j = 0; j < cols; j++){ c[j]= c[j] + cdelta[j]; } } void median_polish_fit_no_copy(double *data, int rows, int cols, double *r, double *c, double *t){ int i,j,iter; int maxiter = 10; double eps=0.01; double oldsum = 0.0,newsum = 0.0; double delta; double *rdelta = Calloc(rows,double); double *cdelta = Calloc(cols,double); double *z = data; /* This is just to keep consistent with other code here. No actual copying of the data is done here */ *t = 0.0; for (iter = 1; iter <= maxiter; iter++){ get_row_median(z,rdelta,rows,cols); subtract_by_row(z,rdelta,rows,cols); rmod(r,rdelta,rows); delta = median(c,cols); for (j = 0; j < cols; j++){ c[j] = c[j] - delta; } *t = *t + delta; get_col_median(z,cdelta,rows,cols); subtract_by_col(z,cdelta,rows,cols); cmod(c,cdelta,cols); delta = median(r,rows); for (i =0; i < rows; i ++){ r[i] = r[i] - delta; } *t = *t+delta; newsum = sum_abs(z,rows,cols); if (newsum == 0.0 || fabs(1.0 - oldsum/newsum) < eps) break; oldsum = newsum; } Free(rdelta); Free(cdelta); } void median_polish_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ int j; double *r = Calloc(rows,double); double *c = Calloc(cols,double); double t; double *z = data; /* This is just to keep consistent with other code here. No actual copying of the data is done here */ median_polish_fit_no_copy(z, rows, cols, r, c, &t); for (j=0; j < cols; j++){ results[j] = t + c[j]; resultsSE[j] = R_NaReal; } Free(r); Free(c); } void median_polish_log2_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ int i, j; for (j = 0; j < cols; j++){ for (i =0; i < rows; i++){ data[j*rows + i] = log(data[j*rows + i])/log(2.0); } } median_polish_no_copy(data,rows,cols,results,resultsSE); } void median_polish_log2(double *data, int rows, int cols, double *results, double *resultsSE, double *residuals){ int i, j; for (j = 0; j < cols; j++){ for (i =0; i < rows; i++){ residuals[j*rows + i] = log(data[j*rows + i])/log(2.0); } } median_polish_no_copy(residuals,rows,cols,results,resultsSE); } void median_polish(double *data, int rows, int cols, double *results, double *resultsSE, double *residuals){ int i, j; for (j = 0; j < cols; j++){ for (i =0; i < rows; i++){ residuals[j*rows + i] = data[j*rows + i]; } } median_polish_no_copy(residuals,rows,cols,results,resultsSE); } /************************************************************************************* ** ** void MedianPolish(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) ** ** double *data - a data matrix of dimension rows by cols (the entire PM matrix) ** int rows, cols - rows and columns dimensions of matrix ** int cur_rows - vector of length nprobes containg row indicies of *data matrix which apply for a ** particular probeset ** double *results - a vector of length cols already allocated. on output contains expression values ** int nprobes - number of probes in current probeset. ** ** a function to do median polish expression summary. ** *************************************************************************************/ /*! \brief Compute medianpolish * * * Given a data matrix of probe intensities, and a list of rows in the matrix * corresponding to a single probeset, compute median polish expression measure. * Note that data is a probes by chips matrix. Also compute SE estimates * * @param data a matrix containing data stored column-wise stored in rows*cols length of memory * @param rows the number of rows in the matrix * @param cols the number of columns in the matrix * @param cur_rows a vector containing row indices to use * @param results pre-allocated space to store output log2 averages. Should be of length cols * @param nprobes number of probes in current set * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols. Note that this is just NA values * * */ void MedianPolish(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = log(data[j*rows + cur_rows[i]])/log(2.0); } } median_polish_no_copy(z,nprobes,cols,results,resultsSE); Free(z); } void MedianPolish_no_log(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ int i,j; double *z = Calloc(nprobes*cols,double); for (j = 0; j < cols; j++){ for (i =0; i < nprobes; i++){ z[j*nprobes + i] = data[j*rows + cur_rows[i]]; } } median_polish_no_copy(z,nprobes,cols,results,resultsSE); Free(z); } preprocessCore/src/medianpolish.h0000644000126300012640000000146612127220006020546 0ustar00biocbuildphs_compbio#ifndef MEDIANPOLISH_H #define MEDIANPOLISH_H 1 void median_polish_fit_no_copy(double *data, int rows, int cols, double *r, double *c, double *t); void median_polish_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); void median_polish_log2_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); void median_polish_log2(double *data, int rows, int cols, double *results, double *resultsSE, double *residuals); void median_polish(double *data, int rows, int cols, double *results, double *resultsSE, double *residuals); void MedianPolish(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); void MedianPolish_no_log(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); #endif preprocessCore/src/plmd.c0000644000126300012640000005503312127220006017020 0ustar00biocbuildphs_compbio/********************************************************************* ** ** file: plmd.c ** ** Aim: implement PLM-d. A variation of the PLM methodology ** ** Copyright (C) 2007-2008 Ben Bolstad ** ** created by: B. M. Bolstad ** ** created on: Dec 3, 2007 ** ** ** History: ** Dec 3, 2007 - Initial version. ** Jan 21, 2008 - improve detect_split_probe ** Jan 23-24, 2008 - improve design matrix code ** ** ** ** ** *********************************************************************/ /********************************************************************* ** ** Background: Suppose that arrays can be grouped together by some ** sort of blocking variable with levels m=1,...,M ** ** standard PLM: y_ij = beta_j + alpha_i + epsilon_ij ** ** where beta_j are the expression values and ** alpha_i are the probe effects (constrained to sum to zero to make the model identifiable ** ** Modfied PLM: (discussed in section 2.4.6 of the affyPLM vignette) ** ** y_ij = beta_j + alpha_im + e_ij ** ** which has a separate set of probe effects estimated for each block. ** (in the affyPLM setup for each m, sum_i=1,..,I alpha_im =0) ** ** PLM-d tries to be a hybrid approach between these two. In particular ** fit unified probe effects across blocks except in situations where there ** seems to be evidence that the probe effect differs across blocks. ** ** In other words, fit the model ** ** y_ij = beta_j + alpha_im + e_ij ** ** with alpha_i = alpha_i1 = alpha_i2 = ... = alpha_im (except where there is evidence to the contrary) ** and sum_{i,m} alpha_im = 0 ** ** For instance suppose that the we have 6 arrays, with the first 3 in m=1 and the last 3 in m=2 ** ** Furthermore suppose that we have a probeset with 8 probes. ** ** Let's assume that the second and the eighth probes are known to behave differently between the blocks ** (later we discuss how this will be decided in the PLM-d procedure) ** ** then the model we would fit would require us to estimate ** ** beta_1, ..., beta_6 (the chip effects/aka expression level) ** ** alpha_1, ** alpha_21, alpha_22, ** alpha_3 ** alpha_4 ** alpha_5 ** alpha_6 ** alpha_7 ** alpha_81, alpha_82 ** ** In R we would generate the model matrix like this ** ** ** chips <- as.factor(rep(c(1,2,3,4,5,6),c(8,8,8,8,8,8))) ** probes <- rep(c(1,2,3,4,5,6,7,8),6) ** ** probes[24 + c(2,10,18)] <- 9 ** probes[24 + c(8,16,24)] <- 10 ** probes <- as.factor(probes) ** ** let X_beta = model.matrix(~ -1 + chips) ** ** So X_beta is ** ** 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 ** let X_alpha = model.matrix(~ -1 + probes)%*%contr.sum(10) ** ** So X alpha is ** 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 ** ** ** ** X = [X_beta X_alpha] ** ** Let B = [beta_1 beta_2 beta_3 beta_4 beta_5 beta_6 alpha_1, alpha_21, alpha_22, alpha_3, alpha_4, alpha_5, alpha_6, alpha_7, alpha_81]' ** ** So the model can be expressed as ** ** Y = XB + E ** ** The PLM-d procedure is as follows ** ** 1) use the ordinary PLM procedure (fitting the model y_ij = beta_j + alpha_i + epsilonij to get initial estimates ** 2) Examine the residuals, find probes where the residuals appear to be related to blocking variable ** using a (robustified) one-way anova. Choose the probe which has the most association with blocking variable if there is one. Else stop. ** 3) Fit new model (using robust regression) where we include separate block specific probe effects for chosen variable ** 4) Examine residuals for newly fitted model. Look at relationship between residuals and probes which have not been ** divided between block groups. Using (robustified) one-way anova choose probe which has the most significant associtiaton with blocking variable if there ** is one. If probe chosen return to 3. otherwise stop. ** ** ** ** *********************************************************************/ #include "psi_fns.h" #include "matrix_functions.h" #include "rlm.h" #include "rlm_se.h" #include "rma_common.h" #include "plmd.h" #include #include #include #include #include #include #include #include /********************************************************************* ** ** static double rho_huber(double u, double k) ** ** double u - scaled observation ** double k - parameter ** ** Computes huber rho function. Note that by definition here will need a factor of ** 2 to multiply result in plmd_split_test. However, definition here ** is consistent with the psi_huber in psi_fns.c ** ********************************************************************/ static double rho_huber(double u, double k){ if (fabs(u) <= k){ return u*u/2.0; } else { return k*(fabs(u) - k/2.0); } } /********************************************************************* ** ** static double plmd_split_test(double *values, int length, int ngroups, int *grouplabels) ** ** double *values - residual values (scaled so scale = 1.0) ** int length - length of values (and also grouplabels) ** int ngroups - number of different groups into which values are divided into ** int *grouplabels - labels for each value in values indicating which group observation ** belongs in. Should be a value 0,....,ngroups-1 ** ** A robust LLRT (log likelihood ratio test) is used to determine if there ** is evidence that the mean differs between groups. ** ** This function computes a test statistic that is approximately ** chisq with ngroups-1 df under the null hypothesis ** ********************************************************************/ static double plmd_split_test(double *values, int length, int ngroups, int *grouplabels){ /* Model 1: NULL model (ie mean model) */ /* Model 2: Basically a robust 1-way ANOVA */ double *X_1, *X_2; /* design models */ double *resid_1, *resid_2; double *weights_1, *weights_2; double *beta_1, *beta_2; double scale_1, scale_2; double T1, T2, TL; double Xi, ave_deriv_psi, ave_psi_sq; int i; /* Allocating space */ X_1 = Calloc(length, double); X_2 = Calloc(ngroups*length, double); resid_1 = Calloc(length, double); resid_2 = Calloc(length, double); weights_1 = Calloc(length, double); weights_2 = Calloc(length, double); beta_1 = Calloc(1,double); beta_2 = Calloc(ngroups,double); /* initializing design matrices */ for (i = 0; i < length; i++){ X_2[length*grouplabels[i] + i] = 1.0; X_1[i] = 1.0; } /* Fitting the models */ rlm_fit(X_1, values, length, 1, beta_1, resid_1, weights_1, psi_huber, 1.345, 20, 0); rlm_fit(X_2, values, length, ngroups, beta_2, resid_2, weights_2, psi_huber, 1.345, 20, 0); scale_1 = 1.0; //med_abs(resid_1, length)/0.6745; scale_2 = med_abs(resid_2, length)/0.6745; T1 = 0.0; T2 = 0.0; TL = 0.0; ave_deriv_psi = 0.0; ave_psi_sq = 0.0; for (i = 0; i < length; i++){ T1+= rho_huber(resid_1[i]/scale_1, 1.345); T2+= rho_huber(resid_2[i]/scale_1, 1.345); ave_deriv_psi += psi_huber(resid_2[i]/scale_1, 1.345, 1); ave_psi_sq += psi_huber(resid_2[i]/scale_2, 1.345, 2)*psi_huber(resid_2[i]/scale_2, 1.345, 2); } ave_deriv_psi/=(double)length; ave_psi_sq/=(double)length; Xi = ave_deriv_psi/ave_psi_sq; TL = T1 - T2; /* check that we are not negative due to say numeric imprecession etc) */ if (TL < 0.0){ TL = 0.0; } /* Rprintf("%f %f %f %f\n", T1, T2, ave_deriv_psi, ave_psi_sq); */ /* De-allocate space */ Free(X_1); Free(X_2); Free(resid_1); Free(resid_2); Free(weights_1); Free(weights_2); Free(beta_1); Free(beta_2); return 2*Xi*TL; } void R_split_test(double *values, int *length, int *ngroups, int *grouplabels, double *result){ *result = plmd_split_test(values, *length, *ngroups, grouplabels); } /********************************************************************* ** ** static int plmd_detect_split_probe(double *residuals, int y_rows, int y_cols, int ngroups, int *grouplabels, int *was_split) ** ** double *residuals - a matrix of residuals from a robust PLM (or PLM-d) fit. ** int y_rows - number of rows in matrix (corresponds to number of probes) ** int y_cols - number of cols in matrix (corresponds to number of arrays) ** int ngroups - number of groups into which the arrays may be divided ** int *grouplabels - a label for each column assigning it to a group. Values should be in 0, ..., ngroups -1 ** int *was_split - was a probe previously split (if so then do not need to split it again). vector of ** 0, 1 values of length y_rows ** ** This function determines which (if any) of the currently unsplit probes ** should be split based on computing the LLRT for each probe. ** ** returns 0, ..., y_rows -1 if a probe is chosen for splitting ** returns -1 if no suitable probe is found. ** ** ********************************************************************/ static int plmd_detect_split_probe(double *residuals, int y_rows, int y_cols, int ngroups, int *grouplabels, int *was_split){ int i,j; double *split_statistic = Calloc(y_rows,double); double *cur_row = Calloc(y_cols, double); double chisq_q; double max_split_statistic; double which_max; double scale; scale = med_abs(residuals, y_rows*y_cols)/0.6745; /* Check all unsplit probes */ for (i = 0; i < y_rows; i++){ if (!was_split[i]){ for (j =0; j < y_cols; j++){ cur_row[j] = residuals[j*y_rows + i]/scale; } split_statistic[i] = plmd_split_test(cur_row, y_cols, ngroups, grouplabels); } else { split_statistic[i] = 0.0; } } /* find probe with maximum split_statistic */ which_max = -1; max_split_statistic = 0.0; for (i= 0; i < y_rows; i++){ if (max_split_statistic < split_statistic[i]){ which_max = i; max_split_statistic = split_statistic[i]; } } /* Check to see if we have a significant split_statistic */ /* test at 0.1% significance level */ if (which_max > -1){ chisq_q = qchisq(0.999,ngroups-1,1,0); if (chisq_q > max_split_statistic){ which_max = -1; } } Free(cur_row); Free(split_statistic); return which_max; } /********************************************************************* ** ** double *plmd_get_design_matrix(int y_rows, int y_cols, int ngroups, int *grouplabels,int *was_split,int *X_rows,int *X_cols) ** ** int y_rows - number of probes ** int y_cols - number of arrays ** int ngroups - number of groups ** int *grouplabels - a label in 0, ..., ngroups-1 for each array (length y_cols) ** int *was_split - a vector of 0,1 values length y_rows indicating whether or not a given probe was split ** int *X_rows - on return contains the number of rows in the design matrix ** int *X_cols - on return contains the number of columns in the design matrix ** ** returns the design matrix for the model described by its parameters ** (see the description above for how the design matrix should look) ** Note that the calling function will be responsible for deallocating ** the memory allocated for the design matrix ** ********************************************************************/ double *plmd_get_design_matrix(int y_rows, int y_cols, int ngroups, int *grouplabels,int *was_split,int *X_rows,int *X_cols){ double *X; int i,j; int probe; int num_splits = 0; int cur_group; int cur_col; int col; /* count the number of probes that have been split */ for (i=0; i < y_rows; i++){ num_splits +=was_split[i]; } /* Number of columns in design_matrix y_cols - number of chips y_rows - 1 + (ngroups -1)*num_splits */ *X_rows = (y_rows*y_cols); *X_cols = (y_cols + y_rows - 1 + (ngroups -1)*num_splits); X = Calloc((y_rows*y_cols)*(y_cols + y_rows - 1 + (ngroups -1)*num_splits),double); /* Setting the X_beta part of the matrix */ for (j = 0; j < y_cols; j++){ for (i = j*y_rows; i < (j+1)*y_rows; i++){ X[j*(y_rows*y_cols) + i] = 1.0; } } /* Now the X_alpha part of the matrix */ cur_col = y_cols; for (probe = 0; probe < y_rows-1; probe++){ if (was_split[probe]){ /*Split probe so need ngroups columns set up */ for (j = 0; j < y_cols; j++){ cur_group = grouplabels[j]; i = j*y_rows + probe; X[(cur_col + cur_group)*(y_rows*y_cols) + i] = 1.0; } cur_col+= ngroups; } else { /* just a single column */ for (i=probe; i < y_cols*y_rows; i=i+y_rows){ X[cur_col*(y_rows*y_cols) + i] = 1.0; } cur_col++; } } /* Last probe */ if (was_split[probe]){ for (j = 0; j < y_cols; j++){ cur_group = grouplabels[j]; if (cur_group == ngroups -1){ i = j*y_rows + probe; for (col = y_cols; col < y_cols + y_rows - 1 + (ngroups -1)*num_splits; col++){ X[col*(y_rows*y_cols) + i] = -1.0; } } else { i = j*y_rows + probe; X[(cur_col + cur_group)*(y_rows*y_cols) + i] = 1.0; } } } else { for (col = y_cols; col < y_cols + y_rows - 1 + (ngroups -1)*num_splits; col++){ for (i=probe; i < y_cols*y_rows; i=i+y_rows){ X[col*(y_rows*y_cols) + i] = -1.0; } } } return X; } void R_test_get_design_matrix(int *yrows, int *ycols){ int i, j; int ngroups = 1; int *grouplabels = Calloc(*ycols, int); int *was_split = Calloc(*yrows, int); int num_splits; int y_rows = *yrows; int y_cols = *ycols; double *X; int X_rows; int X_cols; X = plmd_get_design_matrix(y_rows, y_cols, ngroups, grouplabels,was_split, &X_rows, &X_cols); for (i=0; i < y_rows*y_cols; i++){ for (j = 0; j < y_rows + y_cols -1; j++){ Rprintf("%2.2f ",X[j*(y_rows*y_cols) + i]); } Rprintf("\n"); } Free(X); Rprintf("\n"); ngroups = 2; for (j = 0; j < y_cols/2; j++){ grouplabels[j] = 1; } num_splits = 1; was_split[0] =1; X = plmd_get_design_matrix(y_rows, y_cols, ngroups, grouplabels,was_split, &X_rows, &X_cols); for (i=0; i < y_rows*y_cols; i++){ for (j = 0; j < y_rows + y_cols -1 + (ngroups -1)*num_splits; j++){ Rprintf("%2.2f ",X[j*(y_rows*y_cols) + i]); } Rprintf("\n"); } Free(X); Rprintf("\n"); ngroups = 2; for (j = 0; j < y_cols/2; j++){ grouplabels[j] = 1; } num_splits = 2; was_split[0] =1; was_split[y_rows-1] =1; X = plmd_get_design_matrix(y_rows, y_cols, ngroups, grouplabels,was_split, &X_rows, &X_cols); for (i=0; i < y_rows*y_cols; i++){ for (j = 0; j < y_rows + y_cols -1 + (ngroups -1)*num_splits; j++){ Rprintf("%2.2f ",X[j*(y_rows*y_cols) + i]); } Rprintf("\n"); } Free(grouplabels); } /********************************************************************* ** ** void plmd_fit(double *y, int y_rows, int y_cols, int ngroups, int *grouplabels, int *was_split, double *out_beta, double *out_resids, double *out_weights, double (* PsiFn)(double, double, int), double psi_k,int max_iter) ** ** double *y - matrix of observations (probes in rows, arrays in columns) ** int y_rows - number of probes ** int y_cols - number of arrays ** int ngroups - number of groups into whicharrays may be divided ** int *grouplabels - assign each array to a group. values should be in 0, ... ngroups -1. ** length of grouplabels is y_cols ** int *was_split - on output 1 indicates separate probe effects fit for each group of arrays ** 0 indicates a unified probe effect fit across all arrays ** double *out_beta - on output contains parameter estimates. Note that this space should be of ** length y_cols + y_rows*ngroups -1, but not all the space may be used ** the first y_cols values are the chip effect/expression summary values. ** the remaining values are the probe effects. With these exact break down ** of how these are assigned to probes requires the information in "was_split" ** double *out_resids - on output contains fitted residuals (should be of size y_rows*y_cols ** double *out_weights - on output contains weights used for each observation (should be of size y_rows*y_cols) ** double (* PsiFn)(double, double, int) - psi function for M-estimation ** double psi_k - parameter for PsiFn ** int max_iter - maximum number of iterations in any iteratively reweighted least squares procedure ** ** Fits PLM-d model (Probe Level Model - Dynamic). The idea is to determine dynamically which probes seem to ** systematically vary with respect to the grouplabel variable. When such a probe is found, a model is fit which ** incorporates different probe effects for each group. Otherwise a unified probe effect is fit across all arrays. ** ** In the case that no such probes are found the fitted model is a standard PLM fit. ** ********************************************************************/ void plmd_fit(double *y, int y_rows, int y_cols, int ngroups, int *grouplabels, int *was_split, double *out_beta, double *out_resids, double *out_weights, double (* PsiFn)(double, double, int), double psi_k,int max_iter){ int initialized = 0; int split_probe = -1; double *X; int X_rows; int X_cols; /* Initially nothing is split */ memset(was_split, 0 , y_rows*sizeof(int)); /* Start out with standard PLM fit */ rlm_fit_anova(y, y_rows, y_cols, out_beta, out_resids, out_weights, PsiFn, psi_k, max_iter, initialized); /* Figure out which if any probes to split by group label */ /* Choose the most significant of these and fit new model */ /* repeat until no more splits */ do { split_probe = plmd_detect_split_probe(out_resids, y_rows, y_cols, ngroups, grouplabels, was_split); /* Rprintf("Splitting %d\n",split_probe); */ if (split_probe != -1){ was_split[split_probe] = 1; X = plmd_get_design_matrix(y_rows, y_cols, ngroups, grouplabels, was_split,&X_rows, &X_cols); rlm_fit(X,y, X_rows, X_cols, out_beta, out_resids, out_weights, PsiFn, psi_k, max_iter, initialized); Free(X); } } while (split_probe != -1); } void plmd_fit_R(double *y, int *rows, int *cols, int *ngroups, int *grouplabels, double *out_beta, double *out_resids, double *out_weights){ int *was_split = Calloc(*rows,int); plmd_fit(y, *rows, *cols, *ngroups, grouplabels, was_split, out_beta, out_resids,out_weights, psi_huber,1.345, 20); Free(was_split); } preprocessCore/src/plmd.h0000644000126300012640000000063512127220006017023 0ustar00biocbuildphs_compbio#ifndef PLMD_H #define PLMD_H void plmd_fit(double *y, int y_rows, int y_cols, int ngroups, int *grouplabels, int *was_split, double *out_beta, double *out_resids, double *out_weights, double (* PsiFn)(double, double, int), double psi_k,int max_iter); double *plmd_get_design_matrix(int y_rows, int y_cols, int ngroups, int *grouplabels,int *was_split,int *X_rows,int *X_cols); #endif preprocessCore/src/plmr.c0000644000126300012640000006332712127220006017043 0ustar00biocbuildphs_compbio/********************************************************************* ** ** file: plmr.c ** ** Aim: implement robust linear models specialized to samples + probes model.using ** the PLM-r technique ** ** Copyright (C) 2007-2008 Ben Bolstad ** ** created by: B. M. Bolstad ** ** created on: Nov 22, 2007 ** ** ** History: ** Nov 22, 2007 - Initial version. (Based on rlm_anova.c which dates back several years and some notes about PLMR was to be implemented made about 18 months ago, actually early Sept 2006, which in turn was based about ideas in Bolstad (2004) Dissertation, UCB) ** Feb 14, 2008 - Add PLM-rr and PLM-rc (only row or column robustified but not both) ** ** ** ** *********************************************************************/ #include "psi_fns.h" #include "matrix_functions.h" #include "rlm.h" #include "rlm_se.h" #include "rma_common.h" #include "plmr.h" #include #include #include #include #include #include #include #include static void XTWY(int y_rows, int y_cols, double *wts,double *y, double *xtwy){ int i,j; /* sweep columns (ie chip effects) */ for (j=0; j < y_cols; j++){ xtwy[j] = 0.0; for (i=0; i < y_rows; i++){ xtwy[j] += wts[j*y_rows + i]* y[j*y_rows + i]; } } /* sweep rows (ie probe effects) */ for (i=0; i < y_rows; i++){ xtwy[i+y_cols] = 0.0; for (j=0; j < y_cols; j++){ xtwy[i+y_cols] += wts[j*y_rows + i]* y[j*y_rows + i]; } } for (i=0; i < y_rows-1; i++){ xtwy[i+y_cols] = xtwy[i+y_cols] - xtwy[y_cols+y_rows-1]; } } /********************************************************************************** ** ** This is for testing the XTWY() function from R using .C() ** *********************************************************************************/ static void XTWY_R(int *rows, int *cols, double *out_weights, double *y,double *xtwy){ XTWY(*rows, *cols, out_weights,y,xtwy); } /*************** This is R testing code for my own purposes library(AffyExtensions) data(Dilution) y <- pm(Dilution)[1:16,] .C("XTWY_R",as.integer(16),as.integer(4),as.double(rep(1,64)),as.double(as.vector(log2(y))),double(100)) probes <- rep(1:16,4) samples <- rep(1:4,c(rep(16,4))) X <- model.matrix(~-1 + as.factor(samples) + C(as.factor(probes),"contr.sum")) t(X)%*%as.vector(log2(y)) ****************/ static void XTWX(int y_rows, int y_cols, double *wts, double *xtwx){ int Msize = y_cols +y_rows-1; int i,j,k; /* diagonal elements of first part of matrix ie upper partition */ for (j =0; j < y_cols;j++){ for (i=0; i < y_rows; i++){ xtwx[j*Msize + j]+=wts[j*y_rows + i]; } } /* diagonal portion of lower partition matrix: diagonal elements*/ for (j =0; j < y_cols;j++){ for (i = 0; i < y_rows-1;i++){ xtwx[(y_cols +i)*Msize + (y_cols +i)]+= wts[j*y_rows + i]; } } /* diagonal portion of lower partition matrix: off diagonal elements*/ for (j =0; j < y_cols;j++){ for (i = 0; i < y_rows-1;i++){ for (k=i ; k < y_rows-1;k++){ xtwx[(y_cols +k)*Msize + (y_cols +i)] = xtwx[(y_cols +i)*Msize + (y_cols +k)]+= wts[j*y_rows + (y_rows-1)]; } } } /* the two other portions of the matrix */ for (j =0; j < y_cols;j++){ for (i= 0; i < y_rows-1;i++){ xtwx[j*Msize + (y_cols + i)] = xtwx[(y_cols + i)*Msize + j] = wts[j*y_rows + i] - wts[j*y_rows + (y_rows-1)]; } } } /********************************************************************************** ** ** This is for testing the XTWX from R using .C() ** *********************************************************************************/ static void XTWX_R(int *rows, int *cols, double *out_weights, double *xtwx){ XTWX(*rows, *cols, out_weights,xtwx); } /*************** This is R test code .C("XTWX_R",as.integer(16),as.integer(4),rep(1,64)) *************/ static void XTWXinv(int y_rows, int y_cols,double *xtwx){ int i,j,k; int Msize = y_cols +y_rows-1; double *P= Calloc(y_cols,double); double *RP = Calloc(y_cols*(y_rows-1),double); double *RPQ = Calloc((y_rows-1)*(y_rows-1),double); double *S = Calloc((y_rows-1)*(y_rows-1),double); double *work = Calloc((y_rows-1)*(y_rows-1),double); for (j=0;j < y_cols;j++){ for (i=0; i < y_rows -1; i++){ RP[j*(y_rows-1) + i] = xtwx[j*Msize + (y_cols + i)]*(1.0/xtwx[j*Msize+j]); } } for (i=0; i < y_rows -1; i++){ for (j=i;j < y_rows -1; j++){ for (k=0; k < y_cols;k++){ RPQ[j*(y_rows-1) + i] += RP[k*(y_rows-1) + j]*xtwx[k*Msize + (y_cols + i)]; } RPQ[i*(y_rows-1) + j] = RPQ[j*(y_rows-1) + i]; } } for (j=0; j < y_rows-1;j++){ for (i=j; i < y_rows-1;i++){ RPQ[i*(y_rows-1) + j] = RPQ[j*(y_rows-1)+i] = xtwx[(y_cols + j)*Msize + (y_cols + i)] - RPQ[j*(y_rows-1) + i]; } } /*for (i =0; i< y_rows-1; i++){ for (j=0; j < y_cols; j++){ printf("%4.4f ",RP[j*(y_rows-1) + i]); } printf("\n"); } for (j=0;j < y_rows -1; j++){ for (i=0; i < y_rows -1; i++){ printf("%4.4f ",RPQ[j*(y_rows-1) + i]); } printf("\n"); } for (i=0; i < y_rows -1; i++){ for (j=0;j < y_rows -1; j++){ printf("%4.4f ",S[j*(y_rows-1) + i]); } printf("\n"); } */ /* Lets start making the inverse */ Choleski_inverse(RPQ, S, work, y_rows-1, 0); for (j=0; j< y_cols;j++){ for (i=0; i < y_rows -1; i++){ xtwx[j*Msize + (y_cols + i)] = 0.0; for (k=0; k < y_rows -1; k++){ xtwx[j*Msize + (y_cols + i)]+= -1.0*(S[i*(y_rows-1) + k])*RP[j*(y_rows-1) + k]; } xtwx[(y_cols + i)*Msize + j]=xtwx[j*Msize + (y_cols + i)]; } } for (j=0;j < y_cols;j++){ P[j] = 1.0/xtwx[j*Msize+j]; } for (j=0; j < y_cols; j++){ for (i=j; i < y_cols;i++){ xtwx[i*Msize + j]=0.0; for (k=0;k < y_rows-1; k++){ xtwx[i*Msize + j]+= RP[i*(y_rows-1) + k]*xtwx[j*Msize + (y_cols + k)]; } xtwx[i*Msize + j]*=-1.0; xtwx[j*Msize + i] = xtwx[i*Msize + j]; } xtwx[j*Msize + j]+=P[j]; } for (j=0; j < y_rows-1;j++){ for (i=0; i < y_rows-1;i++){ xtwx[(y_cols + j)*Msize + (y_cols + i)] = S[j*(y_rows-1)+i]; } } Free(P); Free(work); Free(RP); Free(RPQ); Free(S); } /********************************************************************************** ** ** This is for testing the XTWXinv from R ** *********************************************************************************/ static void XTWX_R_inv(int *rows, int *cols, double *xtwx){ XTWXinv(*rows, *cols, xtwx); } /*************** This is R testing code for my own purposes library(AffyExtensions) probes <- rep(1:16,4) samples <- rep(1:4,c(rep(16,4))) X <- model.matrix(~ -1 + as.factor(samples) + C(as.factor(probes),"contr.sum")) W <- diag(seq(0.05,1,length=64)) solve(t(X)%*%W%*%X) - matrix(.C("XTWX_R_inv",as.integer(16),as.integer(4),as.double(t(X)%*%W%*%X))[[3]],19,19) matrix(.C("XTWX_R_inv",as.integer(16),as.integer(4),as.double(t(X)%*%W%*%X))[[3]],19,19)[1:4,5:19] XTWX <- t(X)%*%W%*%X R <- XTWX[5:19,1:4] P <- XTWX[1:4,1:4] Q <- t(R) S <- XTWX [5:19,5:19] R%*%solve(P) probes <- rep(1:16,100) samples <- rep(1:100,c(rep(16,100))) X <- model.matrix(~ -1 + as.factor(samples) + C(as.factor(probes),"contr.sum")) W <- diag(seq(0.05,1,length=1600)) system.time(matrix(.C("XTWX_R_inv",as.integer(16),as.integer(100),as.double(t(X)%*%W%*%X))[[3]],115,115)) *************/ /**************************************************************************************** **************************************************************************************** ** ** The following functions are the core part the PLM-r ** ** these add an additional level of robustness to the standard PLM approach. ** **************************************************************************************** ****************************************************************************************/ /* the following assumptions are used here - assume that the residuals have normal distribution (really we are just hoping that this is approximately true) - the residuals are all independent - standardizing and squaring the residuals results in chi-sq (v=df=1) random variables - we define the median as the (n+1)/2 element if n is odd and the n/2th element if n is even (ie no averaging of the middle elements) when there are less than 30 elements - when n is greater than 30 we use a normal distribution approximation the CDF for chi-square distribution is F(x) = gam(v/2,x/2)/GAMMA(v/2) where gam() is the lower incomplete Gamma function and GAMMA() is the Gamma funciton GAMMA(1/2) = sqrt(PI) gamma(1/2,x) = sqrt(PI)*erf(sqrt(x)) the PDF for chi-square distribution is f(x) = 1/(2^(v/2) * GAMMA(v/2)) * x^(v/2 -1) *exp(-x/2) for x > 0 pnorm(x) = 1/2*(1+ erf(x/sqrt(2))) so erf(x) = 2*pnorm(x*sqrt(2)) - 1 The CDF for the k'th order statistic is given by G(y) = sum_{j=k}^{n} nCj [F(y)]^j [1-F(y)]^(n-j) Where nCj = n!/j!(n-j)! When n is large we use the normal approximation for the K'th order statistic with k/n ->p the distribution is asymptotically normal with mean at x_p (the percentile) and variance 1/n*(p(1-p)/(f(x_p)^2)) x_p is the solution of p = F(x_p) when p = 0.5 x_p = 0.4549364 and f(0.4549364) = 0.4711363 */ double estimate_median_percentile(double median, int n){ int k; int j; double chisq_p; double percentile=0.0; double sd; if (n < 30){ /* using the formula for CDF above */ if (n%2 == 0){ k = n/2; } else { k = (n+1)/2; } chisq_p = pchisq(median,1,1,0); for (j=k; j <= n; j++){ percentile+= dbinom((double)j,(double)n,chisq_p,0); } } else { /* using the normal approximation */ sd = sqrt((double)1/(double)n*0.5*0.5/(0.4711363*0.4711363)); percentile = pnorm(median, 0.4549364, sd, 1, 0); } return percentile; } void R_estimate_median_percentile(double *median, int *n){ double percentile; percentile = estimate_median_percentile(*median, *n); *median = percentile; } /********************************************************************************** ** ** void determine_row_weights(double *resids, int y_rows, int y_cols, double *row_weights) ** ** double *resids - estimated residuals ** int y_rows - dimension of residuals matrix ** int y_cols - dimension of residuals matrix ** double *row_weights - on output will contain a weight (0-1) for row (pre-allocated. should be of length y_rows) ** ** ****************************************************************************************/ void determine_row_weights(double *resids, int y_rows, int y_cols, double *row_weights){ double *current_row = Calloc(y_cols,double); double scale; int n = y_rows*y_cols; int i, j; double row_med; double chisq; double znorm; /* First figure out what we need to standardize the residuals */ scale = med_abs(resids,n)/0.6745; for (i= 0; i < y_rows; i++){ for (j=0; j < y_cols; j++){ current_row[j] = (resids[j*y_rows + i]/scale)*(resids[j*y_rows + i]/scale); } row_med = median_nocopy(current_row, y_cols); /* figure out what quantile that this lies at on the distribution of the median of a sample from Chi-Sq(1) distribution */ chisq = estimate_median_percentile(row_med,y_cols); if (chisq > 0.5){ znorm = qnorm(chisq, 0.0, 1.0, 1, 0); row_weights[i] = psi_huber(znorm,1.345,0); if (row_weights[i] < 0.0001){ row_weights[i] = 0.0001; } } else { row_weights[i] = 1.0; } } Free(current_row); } /********************************************************************************** ** ** void determine_row_weights(double *resids, int y_rows, int y_cols, double *row_weights) ** ** double *resids - estimated residuals ** int y_rows - dimension of residuals matrix ** int y_cols - dimension of residuals matrix ** double *col_weights - on output will contain a weight (0-1) for each col (pre-allocated. should be of length y_cols) ** ** ****************************************************************************************/ void determine_col_weights(double *resids, int y_rows, int y_cols, double *col_weights){ double *current_col = Calloc(y_rows,double); double scale; int n = y_rows*y_cols; int i, j; double row_med; double chisq; double znorm; /* First figure out what we need to standardize the residuals */ scale = med_abs(resids,n)/0.6745; for (j=0; j < y_cols; j++){ for (i= 0; i < y_rows; i++){ current_col[i] = (resids[j*y_rows + i]/scale)*(resids[j*y_rows + i]/scale); } row_med = median_nocopy(current_col, y_rows); /* figure out what quantile that this lies at on the distribution of the median of a sample from Chi-Sq(1) distribution */ chisq = estimate_median_percentile(row_med,y_rows); if (chisq > 0.5){ znorm = qnorm(chisq, 0.0, 1.0, 1, 0); col_weights[j] = psi_huber(znorm,1.345,0); if (col_weights[j] < 0.0001){ col_weights[j] = 0.0001; } } else { col_weights[j] = 1.0; } } Free(current_col); } /* testing code X <- matrix(rnorm(100),10,10) .C("R_determine_row_weights",as.vector(X),as.integer(10),as.integer(10),double(10)) X <- matrix(rnorm(1000),100,10) .C("R_determine_col_weights",as.vector(X),as.integer(100),as.integer(10),double(10)) */ void R_determine_row_weights(double *resids, int *y_rows, int *y_cols, double *row_weights){ determine_row_weights(resids, *y_rows, *y_cols, row_weights); } void R_determine_col_weights(double *resids, int *y_rows, int *y_cols, double *col_weights){ determine_col_weights(resids, *y_rows, *y_cols, col_weights); } /********************************************************************************** ** ** void plmr_fit_core(double *y, int rows, int cols,double *out_beta, ** double *out_resids, double *out_weights, ** double (* PsiFn)(double, double, int), double psi_k,int max_iter, ** int initialized)) ** ** double *y - matrix of response variables (stored by column, with rows probes, columns chips ** int rows - dimensions of y ** int cols - dimensions of y ** ** specializes procedure so decomposes matrix more efficiently ** note that routine is not as numerically stable as above. ** ** fits a row + columns model ** **********************************************************************************/ static void plmr_fit_core(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized, int rowrobust, int colrobust){ int i,j,iter; /* double tol = 1e-7; */ double acc = 1e-4; double scale =0.0; double conv; double endprobe; double *wts = out_weights; double *row_weights = Calloc(y_rows, double); double *col_weights = Calloc(y_cols, double); double *resids = out_resids; double *old_resids = Calloc(y_rows*y_cols,double); double *rowmeans = Calloc(y_rows,double); double *xtwx = Calloc((y_rows+y_cols-1)*(y_rows+y_cols-1),double); double *xtwy = Calloc((y_rows+y_cols),double); double sumweights, rows; rows = y_rows*y_cols; if (!initialized){ /* intially use equal weights */ for (i=0; i < rows; i++){ wts[i] = 1.0; } } /* starting matrix */ for (i=0; i < y_rows; i++){ for (j=0; j < y_cols; j++){ resids[j*y_rows + i] = y[j*y_rows + i]; } } /* sweep columns (ie chip effects) */ for (j=0; j < y_cols; j++){ out_beta[j] = 0.0; sumweights = 0.0; for (i=0; i < y_rows; i++){ out_beta[j] += wts[j*y_rows + i]* resids[j*y_rows + i]; sumweights += wts[j*y_rows + i]; } out_beta[j]/=sumweights; for (i=0; i < y_rows; i++){ resids[j*y_rows + i] = resids[j*y_rows + i] - out_beta[j]; } } /* sweep rows (ie probe effects) */ for (i=0; i < y_rows; i++){ rowmeans[i] = 0.0; sumweights = 0.0; for (j=0; j < y_cols; j++){ rowmeans[i] += wts[j*y_rows + i]* resids[j*y_rows + i]; sumweights += wts[j*y_rows + i]; } rowmeans[i]/=sumweights; for (j=0; j < y_cols; j++){ resids[j*y_rows + i] = resids[j*y_rows + i] - rowmeans[i]; } } for (i=0; i < y_rows-1; i++){ out_beta[i+y_cols] = rowmeans[i]; } if (!rowrobust){ for (i=0; i < y_rows; i++){ row_weights[i] = 1.0; } } if (!colrobust){ for (j=0; j < y_cols; j++){ col_weights[j] = 1.0; } } for (iter = 0; iter < max_iter; iter++){ scale = med_abs(resids,rows)/0.6745; if (fabs(scale) < 1e-10){ /*printf("Scale too small \n"); */ break; } for (i =0; i < rows; i++){ old_resids[i] = resids[i]; } /* weights for individual measurements */ for (i=0; i < rows; i++){ wts[i] = PsiFn(resids[i]/scale,psi_k,0); /* psi_huber(resids[i]/scale,k,0); */ } /* now determine row and column weights */ if (iter > 0){ if (rowrobust){ determine_row_weights(resids, y_rows, y_cols, row_weights); } if (colrobust){ determine_col_weights(resids, y_rows, y_cols, col_weights); } for (j= 0; j < y_cols; j++){ for (i = 0; i < y_rows; i++){ wts[j*y_rows + i] = wts[j*y_rows + i]*row_weights[i]*col_weights[j]; } } } /* weighted least squares */ memset(xtwx,0,(y_rows+y_cols-1)*(y_rows+y_cols-1)*sizeof(double)); XTWX(y_rows,y_cols,wts,xtwx); XTWXinv(y_rows, y_cols,xtwx); XTWY(y_rows, y_cols, wts,y, xtwy); for (i=0;i < y_rows+y_cols-1; i++){ out_beta[i] = 0.0; for (j=0;j < y_rows+y_cols -1; j++){ out_beta[i] += xtwx[j*(y_rows+y_cols -1)+i]*xtwy[j]; } } /* residuals */ for (i=0; i < y_rows-1; i++){ for (j=0; j < y_cols; j++){ resids[j*y_rows +i] = y[j*y_rows + i]- (out_beta[j] + out_beta[i + y_cols]); } } for (j=0; j < y_cols; j++){ endprobe=0.0; for (i=0; i < y_rows-1; i++){ endprobe+= out_beta[i + y_cols]; } resids[j*y_rows + y_rows-1] = y[j*y_rows + y_rows-1]- (out_beta[j] - endprobe); } /*check convergence based on residuals */ conv = irls_delta(old_resids,resids, rows); if (conv < acc){ /* printf("Converged \n");*/ break; } } /* order output in probes, samples order */ /* for (i=0;i < y_rows+y_cols-1; i++){ old_resids[i] = out_beta[i]; } for (i=0; i 0){ if (rowrobust){ determine_row_weights(resids, y_rows, y_cols, row_weights); } if (colrobust){ determine_col_weights(resids, y_rows, y_cols, col_weights); } for (j= 0; j < y_cols; j++){ for (i = 0; i < y_rows; i++){ wts[j*y_rows + i] = wts[j*y_rows + i]*row_weights[i]*col_weights[j]; } } } /* printf("%f\n",scale); */ /* weighted least squares */ memset(xtwx,0,(y_rows+y_cols-1)*(y_rows+y_cols-1)*sizeof(double)); XTWX(y_rows,y_cols,wts,xtwx); XTWXinv(y_rows, y_cols,xtwx); XTWY(y_rows, y_cols, wts,y, xtwy); for (i=0;i < y_rows+y_cols-1; i++){ out_beta[i] = 0.0; for (j=0;j < y_rows+y_cols -1; j++){ out_beta[i] += xtwx[j*(y_rows+y_cols -1)+i]*xtwy[j]; } } /* residuals */ for (i=0; i < y_rows-1; i++){ for (j=0; j < y_cols; j++){ resids[j*y_rows +i] = y[j*y_rows + i]- (out_beta[j] + out_beta[i + y_cols]); } } for (j=0; j < y_cols; j++){ endprobe=0.0; for (i=0; i < y_rows-1; i++){ endprobe+= out_beta[i + y_cols]; } resids[j*y_rows + y_rows-1] = y[j*y_rows + y_rows-1]- (out_beta[j] - endprobe); } /*check convergence based on residuals */ conv = irls_delta(old_resids,resids, rows); if (conv < acc){ /* printf("Converged \n");*/ break; } } /* order output in probes, samples order */ /* for (i=0;i < y_rows+y_cols-1; i++){ old_resids[i] = out_beta[i]; } for (i=0; i ** ** Created on May 31, 2003 ** ** History ** May 31, 2003 - Initial version.Move psi_huber() over from rlm.c ** add fair, Cauchy, Geman-McClure, Welsch and Tukey ** Jun 03, 2003 - add Andrews and some NOTES/WARNINGS. ** Jun 04, 2003 - a mechanism for selecting a psi function ** ********************************************************************/ /********************************************************************* ** ** NOTES: on the tuning constants ** ** Recommended values for the tuning constants (in general these are ** chosen for 95% asymptotic efficiency in the case of the normal) ** ** Huber - k = 1.345 ** Fair - k = 1.3998 ** Cauchy - k=2.3849 ** Welsch - k = 2.9846 ** Tukey Biweight - k = 4.6851 ** Andrews Sine - K = 1.339 ** ** Geman-McClure does not require a tuning constant. ** ********************************************************************/ /********************************************************************* ** ** WARNINGS: ** ** Huber and Fair will converge to unique solutions. The others will ** not. It is recommended that if you use one of the other methods you ** first use a fully iterated huber or other robust method and then ** apply the chosen method. ** ** ********************************************************************/ #include "psi_fns.h" #include #include /********************************************************************* ** ** double psi_huber(double u, double k,int deriv) ** ** double u - data value ** double k - tuning constant ** int type - if 0 then return the evaluation of the weight function, if 1 returns the derivative ** other wise return psi itself ** ** This function computes Hubers suggested PSI function. ** *********************************************************************/ double psi_huber(double u, double k,int deriv){ if (deriv == 0){ if ( 1 < k/fabs(u)){ return 1.0; } else { return k/fabs(u); } } else if (deriv == 1){ if (fabs(u) <= k){ return 1.0; } else { return 0.0; } } else { if (fabs(u) <= k){ return u; } else { if (u < 0){ return -k; } else { return k; } } } } /********************************************************************* ** ** double psi_fair(double u, double k,int deriv) ** ** double u - data value ** double k - tuning constant ** int type - if 0 then return the evaluation of the weight function, if 1 returns the derivative ** other wise return psi itself ** ** This function computes "fair" PSI function ** *********************************************************************/ double psi_fair(double u, double k,int deriv){ if (deriv == 0){ return 1.0/(1.0+fabs(u)/k); } else if (deriv == 1){ if (u >=0){ return 1.0/(1.0+fabs(u)/k) - u/(k*(1.0+fabs(u)/k)*(1.0+fabs(u)/k)); } else { return 1.0/(1.0+fabs(u)/k) + u/(k*(1.0+fabs(u)/k)*(1.0+fabs(u)/k)); } } else { return u/(1.0+fabs(u)/k); } } /********************************************************************* ** ** double psi_fair(double u, double k,int deriv) ** ** double u - data value ** double k - tuning constant ** int type - if 0 then return the evaluation of the weight function, if 1 returns the derivative ** other wise return psi itself ** ** This function computes the cauchy PSI function ** *********************************************************************/ double psi_cauchy(double u, double k,int deriv){ if (deriv == 0){ return 1.0/(1.0+(u/k)*(u/k)); } else if (deriv == 1){ return k*k*(k*k - u*u)/((k*k+u*u)*(k*k+u*u)); } else { return u/(1.0+(u/k)*(u/k)); } } /********************************************************************* ** ** double psi_GemanMcClure(double u, double k,int deriv) ** ** double u - data value ** double k - tuning constant ** int type - if 0 then return the evaluation of the weight function, if 1 returns the derivative ** other wise return psi itself ** ** This function computes the Geman-McClure PSI function ** *********************************************************************/ double psi_GemanMcClure(double u, double k,int deriv){ if (deriv == 0){ return 1.0/((1.0 + u*u)*(1.0 + u*u)); } else if (deriv == 1){ return (1.0 - 3.0*u*u)/((1.0+u*u)*(1.0+u*u)*(1.0+u*u)); } else { return u/((1.0 + u*u)*(1.0 + u*u)); } } /********************************************************************* ** ** double psi_Welsch(double u, double k,int deriv) ** ** double u - data value ** double k - tuning constant ** int type - if 0 then return the evaluation of the weight function, if 1 returns the derivative ** other wise return psi itself ** ** This function computes the Welsch PSI function ** *********************************************************************/ double psi_Welsch(double u, double k,int deriv){ if (deriv == 0){ return exp(-(u/k)*(u/k)); } else if (deriv == 1){ return exp(-(u/k)*(u/k))*(1 - 2.0*(u*u)/(k*k)); } else { return u*exp(-(u/k)*(u/k)); } } /********************************************************************* ** ** double psi_Welsch(double u, double k,int deriv) ** ** double u - data value ** double k - tuning constant ** int type - if 0 then return the evaluation of the weight function, if 1 returns the derivative ** other wise return psi itself ** ** This function computes the Welsch PSI function ** *********************************************************************/ double psi_Tukey(double u, double k,int deriv){ if (deriv == 0){ if (fabs(u) <= k){ return pow((1.0 - (u/k)*(u/k)),2.0); } else { return 0; } } else if (deriv == 1){ if (fabs(u) <= k){ return (1.0 - (u/k)*(u/k))*(1.0-5.0*(u/k)*(u/k)); } else { return 0; } } else { if (fabs(u) <= k){ return u*(1.0 - (u/k)*(u/k))* (1.0 - (u/k)*(u/k)); } else { return 0; } } } /********************************************************************* ** ** double psi_Andrews(double u, double k,int deriv) ** ** double u - data value ** double k - tuning constant ** int type - if 0 then return the evaluation of the weight function, if 1 returns the derivative ** other wise return psi itself ** ** This function computes the Andrews PSI function ** *********************************************************************/ double psi_Andrews(double u, double k,int deriv){ if (deriv == 0){ if (fabs(u) <= k*M_PI){ return sin(u/k)/(u/k); } else { return 0; } } else if (deriv == 1){ if (fabs(u) <= k*M_PI){ return cos(u/k); } else { return 0; } } else { if (fabs(u) <= k*M_PI){ return k*sin(u/k); } else { return 0; } } } pt2psi psifuncArr[7]; pt2psi PsiFunc(int code){ psifuncArr[0] = &psi_huber; psifuncArr[1] = &psi_fair; psifuncArr[2] = &psi_cauchy; psifuncArr[3] = &psi_GemanMcClure; psifuncArr[4] = &psi_Welsch; psifuncArr[5] = &psi_Tukey; psifuncArr[6] = &psi_Andrews; return psifuncArr[code]; } preprocessCore/src/psi_fns.h0000644000126300012640000000075712127220006017535 0ustar00biocbuildphs_compbio#ifndef PSI_FNS_H #define PSI_FNS_H double psi_huber(double u, double k,int deriv); double psi_fair(double u, double k,int deriv); double psi_cauchy(double u, double k,int deriv); double psi_GemanMcClure(double u, double k,int deriv); double psi_Welsch(double u, double k,int deriv); double psi_Tukey(double u, double k,int deriv); double psi_Andrews(double u, double k,int deriv); typedef double (*pt2psi)(double , double , int); pt2psi PsiFunc(int code); int psi_code(char *Name); #endif preprocessCore/src/qnorm.c0000644000126300012640000025154512127220006017226 0ustar00biocbuildphs_compbio/********************************************************** ** ** file: qnorm.c ** ** aim: A c implementation of the quantile normalization method ** ** Copyright (C) 2002-2008 Ben Bolstad ** ** written by: B. M. Bolstad ** ** written: Feb 2, 2002 ** last modified: Nov 19, 2008 ** ** This c code implements the quantile normalization method ** for normalizing high density oligonucleotide data as discussed ** in ** ** Bolstad, B. M., Irizarry R. A., Astrand, M, and Speed, T. P. (2003)(2003) ** A Comparison of Normalization Methods for High ** Density Oligonucleotide Array Data Based on Bias and Variance. ** Bioinformatics 19,2,pp 185-193 ** ** History ** Feb 2, 2002 - Intial c code version from original R code ** Apr 19, 2002 - Update to deal more correctly with ties (equal rank) ** Jan 2, 2003 - Documentation/Commenting updates reformating ** Feb 17, 2003 - add in a free(datvec) to qnorm(). clean up freeing of dimat ** Feb 25, 2003 - try to reduce or eliminate compiler warnings (with gcc -Wall) ** Feb 28, 2003 - update reference to normalization paper in comments ** Mar 25, 2003 - ability to use median, rather than mean in so called "robust" method ** Aug 23, 2003 - add ability to do normalization on log scale in "robust" method. ** also have added .Call() interface c functions which may be called ** now from R as alterative to traditonal means. ** Fixed a bug where use_median was not being dereferenced in "robust method" ** Oct 7, 2003 - fix a bug with length is qnorm_robust ** Mar 6, 2004 - change malloc/free pairs to Calloc/Free ** Mar 3, 2005 - port across the low memory quantile normalization from RMAExpress (and make it the new qnorm_c (previous version made qnorm_c_old) ** Mar 12, 2006 - make some internal functions static ** Mar 13, 2006 - re-working of the "robust" quantile normalizer. The old function is ** still here with a _old added to the name. Also now ** have a .Call() interface for the robust method ** Apr 27-28, 2006 - Add C level functionality for determining which outliers ** to exclude for the "robust" quantile normalizer. ** Jun 2, 2006 - Add a quantile normalization function that accepts a target ** distribution. Improve/add a few comments ** Jun 4, 2006 - Add a .Call interface for target based quantile normalization. ** Add a function for determing target distribution. ** Jun 5, 2006 - Re-organize code blocks ** Add normalization within blocks functions ** Jun 9, 2006 - change nearbyint to floor(x +0.5) (to fix problems on Sparc Solaris builds) ** Aug 1, 2006 - fix bug in determining/applying target ** some changes in how quantiles are estimated in determining/applyin target ** Oct 26, 2006 - fix unbalanced UNPROTECT in use_target. ** Nov 13, 2006 - remove median code ** May 20, 2007 - move to preprocessCore. clean up code. ** May 26, 2007 - fix memory leak in qnorm_c_determine_target ** Jul 12, 2007 - improved ties handling (fixes off by "half" error which affects even numbers of ties) ** Jul 14, 2007 - add NA handling to qnorm_c_using_target and qnorm_c_determine_target ** Oct 6, 2007 - initial pthreads support for qnorm_c supplied by Paul Gordon ** Oct 9, 2007 - modify how columns are partioned to threads (when less columns than threads) ** Mar 14, 2008 - multithreaded qnorm_c_determine_target based on pthreads ** Mar 15, 2008 - multithreaded qnorm_c_using_target based on pthreads ** Jul 31, 2008 - Fix memory leak in use_target ** Aug 1, 2008 - Fix memory leak in determine_target ** Nov 19, 2008 - add *_via_subset code ** Jan 15, 2009 - fix VECTOR_ELT/STRING_ELT issues ** Dec 1, 2010 - change how PTHREAD_STACK_MIN is used ** Jan 5, 2011 - use_target issue when target distribution length != nrow(x) fixed ** ***********************************************************/ /***************************************************************************************************** ***************************************************************************************************** ** ** GENERAL NOTE: Many of the functions take pointers for arguements that are essentially just ** int's. This is mostly legacy for when the functions were called via .C() in R rather ** than via the .Call() interface. ** ***************************************************************************************************** *****************************************************************************************************/ #include #include #include #include "rma_common.h" #include "qnorm.h" #include #include #include #include #ifdef USE_PTHREADS #include #include #include #define THREADS_ENV_VAR "R_THREADS" pthread_mutex_t mutex_R; struct loop_data{ double *data; double *row_mean; int *rows; int *cols; int *row_meanlength; int *in_subset; int start_col; int end_col; }; #endif /***************************************************************************************************** ***************************************************************************************************** ** ** This section defines utility functions and data types ** ** ***************************************************************************************************** *****************************************************************************************************/ /************************************************************* ** ** the dataitem record is used to keep track of data indicies ** along with data value when sorting and unsorting in the ** quantile algorithm. ** ************************************************************/ typedef struct{ double data; int rank; } dataitem; /************************************************************* ** ** the dataitem_block record is used to keep track of data indicies ** along with data value when sorting and unsorting in the ** quantile algorithm in blocks ** ************************************************************/ typedef struct{ double data; int rank; int block; } dataitem_block; /*********************************************************** ** ** int min(int x1, int x2) ** ** returns the minimum of x1 and x2 ** **********************************************************/ /* static int min(int x1,int x2){ * if (x1 > x2) * return x2; * else * return x1; *} */ /********************************************************** ** ** int sort_fn(const void *a1,const void *a2) ** ** a comparison function for sorting objects of the dataitem type. ** ** **********************************************************/ static int sort_fn(const void *a1,const void *a2){ dataitem *s1, *s2; s1 = (dataitem *)a1; s2 = (dataitem *)a2; if (s1->data < s2->data) return (-1); if (s1 ->data > s2->data) return (1); return 0; } /********************************************************** ** ** int sort_fn_blocks(const void *a1,const void *a2) ** ** a comparison function for sorting objects of the dataitem_blocks type. ** ** **********************************************************/ static int sort_fn_blocks(const void *a1,const void *a2){ dataitem_block *s1, *s2; s1 = (dataitem_block *)a1; s2 = (dataitem_block *)a2; if (s1->block < s2->block){ return (-1); } else if (s1->block > s2->block){ return (1); } else { if (s1->data < s2->data) return (-1); if (s1 ->data > s2->data) return (1); return 0; } } /************************************************************ ** ** dataitem **get_di_matrix(double *data, int rows, int cols) ** ** given data form a matrix of dataitems, each element of ** matrix holds datavalue and original index so that ** normalized data values can be resorted to the original order ** ***********************************************************/ static dataitem **get_di_matrix(double *data, int rows, int cols){ int i,j; dataitem **dimat; /* dataitem *xtmp; */ dimat = (dataitem **)Calloc((cols),dataitem *); if (dimat == NULL){ printf("\nERROR - Sorry the normalization routine could not allocate adequate memory\n You probably need more memory to work with a dataset this large\n"); } /* xtmp = malloc(cols*rows*sizeof(dataitem)); for (j=0; j < cols; j++, xtmp +=rows) dimat[j] = xtmp; */ for (j=0; j < cols; j++) dimat[j] = Calloc(rows,dataitem); for (j =0; j < cols; j++) for (i =0; i < rows; i++){ dimat[j][i].data = data[j*rows + i]; dimat[j][i].rank = i; } return(dimat); } /************************************************************ ** ** double *get_ranks(dataitem *x,int n) ** ** get ranks in the same manner as R does. Assume that *x is ** already sorted ** *************************************************************/ static void get_ranks(double *rank, dataitem *x,int n){ int i,j,k; i = 0; while (i < n) { j = i; while ((j < n - 1) && (x[j].data == x[j + 1].data)) j++; if (i != j) { for (k = i; k <= j; k++) rank[k] = (i + j + 2) / 2.0; } else rank[i] = i + 1; i = j + 1; } /*return rank;*/ } /************************************************************ ** ** double *get_ranks_blocks(dataitem *x,int n) ** ** get ranks in the same manner as R does. Assume that *x is ** already sorted ** *************************************************************/ static void get_ranks_blocks(double *rank, dataitem_block *x,int n){ int i,j,k; i = 0; while (i < n) { j = i; while ((j < n - 1) && (x[j].data == x[j + 1].data) && (x[j].block == x[j + 1].block)) j++; if (i != j) { for (k = i; k <= j; k++) rank[k] = (i + j + 2) / 2.0; } else rank[i] = i + 1; i = j + 1; } /*return rank;*/ } /************************************************************************* ** ** static double weights_huber(double u, double k) ** ** double u - standardized residuals ** doubke k - tuning parameter ** ** Used to get weights for M-estimation. ** *************************************************************************/ static double weights_huber(double u, double k){ if ( 1 < k/fabs(u)){ return 1.0; } else { return k/fabs(u); } } /************************************************************************** ** ** static double med_abs(double *x, int length) ** ** double *x - a data vector ** int length - length of x ** ** Compute the median absolute value of a data vector ** *************************************************************************/ static double med_abs(double *x, int length){ int i; double med_abs; double *buffer = Calloc(length,double); for (i = 0; i < length; i++) buffer[i] = fabs(x[i]); med_abs = median_nocopy(buffer,length); Free(buffer); return(med_abs); } /***************************************************************************************************** ***************************************************************************************************** ** ** The following block implements the standard quantile normalization function (aka "classic") ** ** ***************************************************************************************************** *****************************************************************************************************/ void normalize_determine_target(double *data, double *row_mean, int *rows, int *cols, int start_col, int end_col){ int i, j; double *datvec = (double *)Calloc((*rows),double); #ifdef USE_PTHREADS long double *row_submean = (long double *)Calloc((*rows), long double); for (i =0; i < *rows; i++){ row_submean[i] = 0.0; } #endif for (j = start_col; j <= end_col; j++){ /* first find the normalizing distribution */ for (i = 0; i < *rows; i++){ datvec[i] = data[j*(*rows) + i]; } qsort(datvec,*rows,sizeof(double),(int(*)(const void*, const void*))sort_double); for (i = 0; i < *rows; i++){ #ifdef USE_PTHREADS row_submean[i] += datvec[i]; #else row_mean[i] += datvec[i]/((double)*cols); #endif } } Free(datvec); #ifdef USE_PTHREADS /* add to the global running total, will do the division after all threads finish (for precision of the result) */ pthread_mutex_lock (&mutex_R); for (i = 0; i < *rows; i++){ row_mean[i] += (double) row_submean[i]; } pthread_mutex_unlock (&mutex_R); Free(row_submean); #endif } void normalize_distribute_target(double *data, double *row_mean, int *rows, int *cols, int start_col, int end_col){ int i, j, ind; dataitem **dimat; double *ranks = (double *)Calloc((*rows),double); dimat = (dataitem **)Calloc(1,dataitem *); dimat[0] = (dataitem *)Calloc(*rows,dataitem); for (j = start_col; j <= end_col; j++){ for (i = 0; i < *rows; i++){ dimat[0][i].data = data[j*(*rows) + i]; dimat[0][i].rank = i; } qsort(dimat[0],*rows,sizeof(dataitem),sort_fn); get_ranks(ranks,dimat[0],*rows); for (i = 0; i < *rows; i++){ ind = dimat[0][i].rank; if (ranks[i] - floor(ranks[i]) > 0.4){ data[j*(*rows) +ind] = 0.5*(row_mean[(int)floor(ranks[i])-1] + row_mean[(int)floor(ranks[i])]); } else { data[j*(*rows) +ind] = row_mean[(int)floor(ranks[i])-1]; } } } Free(ranks); Free(dimat[0]); Free(dimat); } #ifdef USE_PTHREADS void *normalize_group(void *data){ struct loop_data *args = (struct loop_data *) data; normalize_determine_target(args->data, args->row_mean, args->rows, args->cols, args->start_col, args->end_col); } void *distribute_group(void *data){ struct loop_data *args = (struct loop_data *) data; normalize_distribute_target(args->data, args->row_mean, args->rows, args->cols, args->start_col, args->end_col); } #endif /********************************************************* ** ** void qnorm_c(double *data, int *rows, int *cols) ** ** this is the function that actually implements the ** quantile normalization algorithm. It is called from R. ** ** returns 1 if there is a problem, 0 otherwise ** ** Note that this function does not handle missing data (ie NA) ** ********************************************************/ int qnorm_c(double *data, int *rows, int *cols){ int i; double *row_mean = (double *)Calloc((*rows),double); #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif for (i =0; i < *rows; i++){ row_mean[i] = 0.0; } #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of columns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of columns is less than the number of threads */ if (num_threads < *cols){ chunk_size = *cols/num_threads; chunk_size_d = ((double) *cols)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((*cols < num_threads ? *cols : num_threads), struct loop_data); args[0].data = data; args[0].row_mean = row_mean; args[0].rows = rows; args[0].cols = cols; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < *cols; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_col = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_col = i+chunk_size; i++; } else{ args[t].end_col = i+chunk_size-1; } t++; } /* Determining the quantile normalization target distribution */ for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, normalize_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } /* When in threaded mode, row_mean is the sum, waiting for a final division here, to maintain precision */ for (i = 0; i < *rows; i++){ row_mean[i] /= (double)*cols; } /* now assign back the target normalization distribution to a given set of columns */ for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, distribute_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else normalize_determine_target(data, row_mean, rows, cols, 0, *cols-1); normalize_distribute_target(data, row_mean, rows, cols, 0, *cols-1); #endif Free(row_mean); return 0; } /********************************************************* ** ** SEXP R_qnorm_c(SEXP X) ** ** SEXP X - a matrix ** SEXP copy - a flag if TRUE then make copy ** before normalizing, if FALSE work in place ** note that this can be dangerous since ** it will change the original matrix. ** ** returns a quantile normalized matrix. ** ** This is a .Call() interface for quantile normalization ** *********************************************************/ SEXP R_qnorm_c(SEXP X, SEXP copy){ SEXP Xcopy,dim1; double *Xptr; int rows,cols; PROTECT(dim1 = getAttrib(X,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; if (asInteger(copy)){ PROTECT(Xcopy = allocMatrix(REALSXP,rows,cols)); copyMatrix(Xcopy,X,0); } else { Xcopy = X; } Xptr = NUMERIC_POINTER(AS_NUMERIC(Xcopy)); qnorm_c(Xptr, &rows, &cols); if (asInteger(copy)){ UNPROTECT(2); } else { UNPROTECT(1); } return Xcopy; } /***************************************************************************************************** ***************************************************************************************************** ** ** The following block of code provides the "robust" quantile normalization. In addition it tries to ** give the equivalent to the R code functionality for selecting arrays to remove before determining ** ** ***************************************************************************************************** *****************************************************************************************************/ /********************************************************* ** ** void qnorm_robust_c(double *data,double *weights, int *rows, int *cols, int *use_median,int *use_log2,int *weight_scheme) ** ** double *data ** double *weights ** int *rows ** int *cols ** int *use_median ** int *use_log2 ** int *weight_scheme ** ** This function implements the "robust" quantile normalizer ** ** Note that this function does not handle NA values. ** ********************************************************/ int qnorm_robust_c(double *data,double *weights, int *rows, int *cols, int *use_median, int *use_log2, int *weight_scheme){ int i,j,ind,rep; int half,length; dataitem **dimat; double *row_mean = (double *)Calloc((*rows),double); double *datvec=0; /* = (double *)Calloc(*cols,double); */ double *ranks = (double *)Calloc((*rows),double); double sum_weights = 0.0; double mean, scale; /* used in M-estimation routine */ for (i =0; i < *rows; i++){ row_mean[i] = 0.0; } if ((*weight_scheme == 0) && !(*use_median)){ datvec = (double *)Calloc(*rows,double); if (!(*use_log2)){ for (j = 0; j < *cols; j++){ sum_weights+=weights[j]; } for (j = 0; j < *cols; j++){ for (i =0; i < *rows; i++){ datvec[i] = data[j*(*rows) + i]; } qsort(datvec,*rows,sizeof(double),(int(*)(const void*, const void*))sort_double); if (weights[j] > 0.0){ for (i =0; i < *rows; i++){ row_mean[i] += weights[j]*datvec[i]/sum_weights; } } } } else { for (j = 0; j < *cols; j++){ sum_weights+=weights[j]; } for (j = 0; j < *cols; j++){ for (i =0; i < *rows; i++){ datvec[i] = data[j*(*rows) + i]; } qsort(datvec,*rows,sizeof(double),(int(*)(const void*, const void*))sort_double); if (weights[j] > 0.0){ for (i =0; i < *rows; i++){ row_mean[i] += weights[j]*(log(datvec[i])/log(2.0))/sum_weights; } } } for (i =0; i < *rows; i++){ row_mean[i] = pow(2.0,row_mean[i]); } } } else if ((*weight_scheme == 1) && !(*use_median)){ /** row-wise huber weights **/ dimat = get_di_matrix(data, *rows, *cols); datvec = Calloc(*cols,double); for (j=0; j < *cols; j++){ qsort(dimat[j],*rows,sizeof(dataitem),sort_fn); } if (!(*use_log2)){ for (i=0; i < *rows; i++){ for (j=0; j < *cols; j++) datvec[j] = dimat[j][i].data; /* five step huber estimate of location */ mean = 0.0; for (j=0; j < *cols; j++){ mean += datvec[j]/(double)(*cols); } for (rep = 0; rep < 5; rep++){ for (j=0; j < *cols; j++){ datvec[j] = datvec[j] - mean; } scale = med_abs(datvec,*cols)/0.6745; if (scale == 0.0){ break; } for (j=0; j < *cols; j++){ datvec[j] = (datvec[j] - mean)/scale; } mean = 0.0; sum_weights=0.0; for (j=0; j < *cols; j++){ mean+= weights_huber(datvec[j],1.345) * dimat[j][i].data; sum_weights+=weights_huber(datvec[j],1.345); } mean/=sum_weights; for (j=0; j < *cols; j++) datvec[j] = dimat[j][i].data; /* Rprintf("rep %d %f %f\n",rep,mean,scale); */ } row_mean[i] = mean; } } else { for (i=0; i < *rows; i++){ for (j=0; j < *cols; j++) datvec[j] = log(dimat[j][i].data)/log(2.0); /* five step huber estimate of location */ mean = 0.0; for (j=0; j < *cols; j++){ mean += datvec[j]/(double)(*cols); } for (rep = 0; rep < 5; rep++){ for (j=0; j < *cols; j++){ datvec[j] = datvec[j] - mean; } scale = med_abs(datvec,*cols)/0.6745; if (scale == 0.0){ break; } for (j=0; j < *cols; j++){ datvec[j] = (datvec[j] - mean)/scale; } mean = 0.0; sum_weights=0.0; for (j=0; j < *cols; j++){ mean+= weights_huber(datvec[j],1.345) * log(dimat[j][i].data)/log(2.0); sum_weights+=weights_huber(datvec[j],1.345); } mean/=sum_weights; for (j=0; j < *cols; j++) datvec[j] = log(dimat[j][i].data)/log(2.0); /* Rprintf("rep %d %f %f\n",rep,mean,scale); */ } row_mean[i] = pow(2.0,mean); } } for (j=0; j < *cols; j++){ Free(dimat[j]); } Free(dimat); } else if ((*use_median)){ dimat = get_di_matrix(data, *rows, *cols); datvec = Calloc(*cols,double); for (j=0; j < *cols; j++){ qsort(dimat[j],*rows,sizeof(dataitem),sort_fn); } for (i=0; i < *rows; i++){ for (j=0; j < *cols; j++) datvec[j] = dimat[j][i].data; qsort(datvec,*cols,sizeof(double),(int(*)(const void*, const void*))sort_double); half = (*cols + 1)/2; length = *cols; if (length % 2 == 1){ row_mean[i] = datvec[half - 1]; } else { row_mean[i] = (datvec[half] + datvec[half-1])/2.0; } } for (j=0; j < *cols; j++){ Free(dimat[j]); } Free(dimat); } else { error("Not sure that these inputs are recognised for the robust quantile normalization routine.\n"); } /* now assign back distribution */ dimat = (dataitem **)Calloc(1,dataitem *); dimat[0] = (dataitem *)Calloc(*rows,dataitem); for (j = 0; j < *cols; j++){ for (i =0; i < *rows; i++){ dimat[0][i].data = data[j*(*rows) + i]; dimat[0][i].rank = i; } qsort(dimat[0],*rows,sizeof(dataitem),sort_fn); get_ranks(ranks,dimat[0],*rows); for (i =0; i < *rows; i++){ ind = dimat[0][i].rank; if (ranks[i] - floor(ranks[i]) > 0.4){ data[j*(*rows) +ind] = 0.5*(row_mean[(int)floor(ranks[i])-1] + row_mean[(int)floor(ranks[i])]); } else { data[j*(*rows) +ind] = row_mean[(int)floor(ranks[i])-1]; } } } Free(ranks); Free(datvec); Free(dimat[0]); Free(dimat); Free(row_mean); return 0; } /********************************************************* ** ** SEXP R_qnorm_robust_c(SEXP X) ** ** SEXP X - a matrix ** SEXP copy - a flag if TRUE then make copy ** before normalizing, if FALSE work in place ** note that this can be dangerous since ** it will change the original matrix. ** ** returns a quantile normalized matrix. ** ** This is a .Call() interface for quantile normalization (of the robust variety) ** *********************************************************/ SEXP R_qnorm_robust_c(SEXP X, SEXP copy, SEXP R_weights, SEXP R_use_median, SEXP R_use_log2, SEXP R_weight_scheme){ SEXP Xcopy,dim1; double *Xptr; int rows,cols; double *weights; int use_median; int use_log2; int weight_scheme; PROTECT(dim1 = getAttrib(X,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; if (asInteger(copy)){ PROTECT(Xcopy = allocMatrix(REALSXP,rows,cols)); copyMatrix(Xcopy,X,0); } else { Xcopy = X; } Xptr = NUMERIC_POINTER(AS_NUMERIC(Xcopy)); weights = NUMERIC_POINTER(AS_NUMERIC(R_weights)); use_median = INTEGER(R_use_median)[0]; use_log2 = INTEGER(R_use_log2)[0]; weight_scheme = INTEGER(R_weight_scheme)[0]; qnorm_robust_c(Xptr,weights, &rows, &cols, &use_median, &use_log2, &weight_scheme); if (asInteger(copy)){ UNPROTECT(2); } else { UNPROTECT(1); } return Xcopy; } /***************************************************************** ** ** static double compute_var(double *x, int length) ** ** double *x - data vector ** int length - length of x ** ** compute the sample variance of a data vector ** *****************************************************************/ static double compute_var(double *x, int length){ int i; double sum=0.0,sum2=0.0; for (i = 0; i < length; i++){ sum+=x[i]; } sum = sum/(double)length; for (i=0; i < length; i++){ sum2+=(x[i]-sum)*(x[i] - sum); } return(sum2/(double)(length-1)); } /***************************************************************** ** ** static double compute_means(double *x, int length) ** ** double *x - data vector ** int length - length of x ** ** compute the sample mean of a data vector ** ** *****************************************************************/ static double compute_means(double *x, int length){ int i; double sum=0.0; for (i = 0; i < length; i++){ sum+=x[i]; } sum = sum/(double)length; return(sum); } /***************************************************************** ** ** static void remove_order_variance(double *x, int rows, int cols, int n_remove, double *weights) ** ** double *x ** int rows ** int cols ** int n_remove ** double *weights ** *****************************************************************/ static void remove_order_variance(double *x, int rows, int cols, int n_remove, double *weights){ double *vars = Calloc(cols,double); double *vars_row = Calloc(cols,double); double *vars_col = Calloc(cols,double); double *results = Calloc(cols*cols,double); int i,j; for (j=0; j < cols; j++){ vars[j] = compute_var(&x[j*rows],rows); } for (i = 0; i < cols -1; i++){ for (j = i+1; j < cols; j++){ results[j*cols + i] = vars[i]/vars[j]; results[i*cols + j] = vars[j]/vars[i]; } } for (i = 0; i < cols; i++){ vars_row[i] = 0.0; for (j=0; j < cols; j++){ vars_row[i]+=results[j*cols + i]; } } for (j = 0; j < cols; j++){ vars_col[j] = 0.0; for (i=0; i < cols; i++){ vars_col[j]+=results[j*cols + i]; } } for (j=0; j < cols; j++){ vars_row[j] = vars[j] = vars_row[j] + vars_col[j]; } qsort(vars_row,cols,sizeof(double),(int(*)(const void*, const void*))sort_double); for (i=cols-1; i >= cols - n_remove; i--){ for (j=0; j < cols; j++){ if (vars[j] == vars_row[i]){ weights[j] =0.0; break; } } } Free(results); Free(vars); Free(vars_row); Free(vars_col); } /***************************************************************** ** ** static void remove_order_mean(double *x, int rows, int cols, int n_remove, double *weights) ** ** double *x ** int rows ** int cols ** int n_remove ** double *weights ** *****************************************************************/ static void remove_order_mean(double *x, int rows, int cols, int n_remove, double *weights){ double *means = Calloc(cols,double); double *means_row = Calloc(cols,double); double *means_col = Calloc(cols,double); double *results = Calloc(cols*cols,double); int i,j; for (j=0; j < cols; j++){ means[j] = compute_means(&x[j*rows],rows); } for (i = 0; i < cols -1; i++){ for (j = i+1; j < cols; j++){ results[j*cols + i] = means[i] - means[j]; results[i*cols + j] = means[j]- means[i]; } } for (j = 0; j < cols; j++){ means_col[j] = 0.0; for (i=0; i < cols; i++){ means_col[j]+=results[j*cols + i]; } } for (j=0; j < cols; j++){ means_row[j] = means[j] = fabs(means_col[j]); } qsort(means_row,cols,sizeof(double),(int(*)(const void*, const void*))sort_double); for (i=cols-1; i >= cols - n_remove; i--){ for (j=0; j < cols; j++){ if (means[j] == means_row[i]){ weights[j] =0.0; break; } } } Free(results); Free(means); Free(means_row); Free(means_col); } /***************************************************************** ** ** static void remove_order_both(double *x, int rows, int cols, int n_remove, double *weights) ** ** double *x ** int rows ** int cols ** int n_remove ** double *weights ** *****************************************************************/ static void remove_order_both(double *x, int rows, int cols, int n_remove, double *weights){ double *means = Calloc(cols,double); double *means_row = Calloc(cols,double); double *means_col = Calloc(cols,double); double *vars = Calloc(cols,double); double *vars_row = Calloc(cols,double); double *vars_col = Calloc(cols,double); double *results = Calloc(cols*cols,double); int i,j; int n_remove_mean; int n_remove_var; if (n_remove % 2 ==0){ n_remove_var = n_remove/2; n_remove_mean = n_remove/2; } else { n_remove_var = n_remove/2 + 1; n_remove_mean = n_remove/2; } /* Work out all the stuff for excluding means */ for (j=0; j < cols; j++){ means[j] = compute_means(&x[j*rows],rows); } for (i = 0; i < cols -1; i++){ for (j = i+1; j < cols; j++){ results[j*cols + i] = means[i] - means[j]; results[i*cols + j] = means[j]- means[i]; } } for (j = 0; j < cols; j++){ means_col[j] = 0.0; for (i=0; i < cols; i++){ means_col[j]+=results[j*cols + i]; } } for (j=0; j < cols; j++){ means_row[j] = means[j] = fabs(means_col[j]); } qsort(means_row,cols,sizeof(double),(int(*)(const void*, const void*))sort_double); /* Work out all the stuff for excluding variances */ for (j=0; j < cols; j++){ vars[j] = compute_var(&x[j*rows],rows); } for (i = 0; i < cols -1; i++){ for (j = i+1; j < cols; j++){ results[j*cols + i] = vars[i]/vars[j]; results[i*cols + j] = vars[j]/vars[i]; } } for (i = 0; i < cols; i++){ vars_row[i] = 0.0; for (j=0; j < cols; j++){ vars_row[i]+=results[j*cols + i]; } } for (j = 0; j < cols; j++){ vars_col[j] = 0.0; for (i=0; i < cols; i++){ vars_col[j]+=results[j*cols + i]; } } for (j=0; j < cols; j++){ vars_row[j] = vars[j] = vars_row[j] + vars_col[j]; } qsort(vars_row,cols,sizeof(double),(int(*)(const void*, const void*))sort_double); for (i=cols-1; i >= cols - n_remove_var; i--){ for (j=0; j < cols; j++){ if (vars[j] == vars_row[i]){ weights[j] =0.0; break; } } } for (i=cols-1; i >= cols - n_remove_mean; i--){ for (j=0; j < cols; j++){ if (means[j] == means_row[i]){ if (weights[j] ==0.0){ /* means it has already been excluded by variance rule. So need to look one more along */ n_remove_mean+=1; } else { weights[j] =0.0; break; } } } } } SEXP R_qnorm_robust_weights(SEXP X, SEXP remove_extreme, SEXP n_remove){ SEXP weights,dim1; int rows, cols; int j; PROTECT(dim1 = getAttrib(X,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; PROTECT(weights = allocVector(REALSXP,cols)); for (j=0; j < cols; j++){ REAL(weights)[j] = 1.0; } if (strcmp(CHAR(STRING_ELT(remove_extreme,0)),"variance") == 0){ remove_order_variance(REAL(X), rows, cols, INTEGER(n_remove)[0], REAL(weights)); } if (strcmp(CHAR(STRING_ELT(remove_extreme,0)),"mean") == 0){ remove_order_mean(REAL(X), rows, cols, INTEGER(n_remove)[0], REAL(weights)); } if (strcmp(CHAR(STRING_ELT(remove_extreme,0)),"both") == 0){ remove_order_both(REAL(X), rows, cols, INTEGER(n_remove)[0], REAL(weights)); } UNPROTECT(2); return weights; } /***************************************************************************************************** ***************************************************************************************************** ** ** The following block of code provides quantile normalization where a specified target vector is given. ** In addition it deals with cases of un equal length by estimating the appropriate quantiles ** ***************************************************************************************************** *****************************************************************************************************/ void using_target(double *data, int *rows, int *cols, double *target, int *targetrows, int start_col, int end_col){ int i,j,ind,target_ind; dataitem **dimat; double *row_mean = target; double *ranks = (double *)Calloc((*rows),double); double samplepercentile; double target_ind_double,target_ind_double_floor; int targetnon_na = *targetrows; int non_na = 0; if (*rows == targetnon_na){ /* now assign back distribution */ /* this is basically the standard story */ dimat = (dataitem **)Calloc(1,dataitem *); dimat[0] = (dataitem *)Calloc(*rows,dataitem); for (j = start_col; j <= end_col; j++){ non_na = 0; for (i =0; i < *rows; i++){ if (ISNA(data[j*(*rows) + i])){ } else { dimat[0][non_na].data = data[j*(*rows) + i]; dimat[0][non_na].rank = i; non_na++; } } if (non_na == *rows){ qsort(dimat[0],*rows,sizeof(dataitem),sort_fn); get_ranks(ranks,dimat[0],*rows); for (i =0; i < *rows; i++){ ind = dimat[0][i].rank; if (ranks[i] - floor(ranks[i]) > 0.4){ data[j*(*rows) +ind] = 0.5*(row_mean[(int)floor(ranks[i])-1] + row_mean[(int)floor(ranks[i])]); } else { data[j*(*rows) +ind] = row_mean[(int)floor(ranks[i])-1]; } } } else { /* we are going to have to estimate the quantiles */ qsort(dimat[0],non_na,sizeof(dataitem),sort_fn); get_ranks(ranks,dimat[0],non_na); for (i =0; i < non_na; i++){ samplepercentile = (double)(ranks[i] - 1)/(double)(non_na-1); /* target_ind_double = 1.0/3.0 + ((double)(*targetrows) + 1.0/3.0) * samplepercentile; */ target_ind_double = 1.0 + ((double)(targetnon_na) - 1.0) * samplepercentile; target_ind_double_floor = floor(target_ind_double + 4*DOUBLE_EPS); target_ind_double = target_ind_double - target_ind_double_floor; if (fabs(target_ind_double) <= 4*DOUBLE_EPS){ target_ind_double = 0.0; } if (target_ind_double == 0.0){ target_ind = (int)floor(target_ind_double_floor + 0.5); /* nearbyint(target_ind_double_floor); */ ind = dimat[0][i].rank; data[j*(*rows) +ind] = row_mean[target_ind-1]; } else if (target_ind_double == 1.0){ target_ind = (int)floor(target_ind_double_floor + 1.5); /* (int)nearbyint(target_ind_double_floor + 1.0); */ ind = dimat[0][i].rank; data[j*(*rows) +ind] = row_mean[target_ind-1]; } else { target_ind = (int)floor(target_ind_double_floor + 0.5); /* nearbyint(target_ind_double_floor); */ ind = dimat[0][i].rank; if ((target_ind < *targetrows) && (target_ind > 0)){ data[j*(*rows) +ind] = (1.0- target_ind_double)*row_mean[target_ind-1] + target_ind_double*row_mean[target_ind]; } else if (target_ind >= *targetrows){ data[j*(*rows) +ind] = row_mean[*targetrows-1]; } else { data[j*(*rows) +ind] = row_mean[0]; } } } } } } else { /** the length of the target distribution and the size of the data matrix differ **/ /** need to estimate quantiles **/ dimat = (dataitem **)Calloc(1,dataitem *); dimat[0] = (dataitem *)Calloc(*rows,dataitem); for (j = start_col; j <= end_col; j++){ non_na = 0; for (i =0; i < *rows; i++){ if (ISNA(data[j*(*rows) + i])){ } else { dimat[0][non_na].data = data[j*(*rows) + i]; dimat[0][non_na].rank = i; non_na++; } } qsort(dimat[0],non_na,sizeof(dataitem),sort_fn); get_ranks(ranks,dimat[0],non_na); for (i =0; i < non_na; i++){ samplepercentile = (double)(ranks[i] - 1.0)/(double)(non_na -1); /* target_ind_double = 1.0/3.0 + ((double)(*targetrows) + 1.0/3.0) * samplepercentile; */ target_ind_double = 1.0 + ((double)(targetnon_na) - 1.0) * samplepercentile; target_ind_double_floor = floor(target_ind_double + 4*DOUBLE_EPS); target_ind_double = target_ind_double - target_ind_double_floor; if (fabs(target_ind_double) <= 4*DOUBLE_EPS){ target_ind_double = 0.0; } if (target_ind_double == 0.0){ target_ind = (int)floor(target_ind_double_floor + 0.5); /* nearbyint(target_ind_double_floor); */ ind = dimat[0][i].rank; data[j*(*rows) +ind] = row_mean[target_ind-1]; } else if (target_ind_double == 1.0){ target_ind = (int)floor(target_ind_double_floor + 1.5); /* (int)nearbyint(target_ind_double_floor + 1.0); */ ind = dimat[0][i].rank; data[j*(*rows) +ind] = row_mean[target_ind-1]; } else { target_ind = (int)floor(target_ind_double_floor + 0.5); /* nearbyint(target_ind_double_floor); */ ind = dimat[0][i].rank; if ((target_ind < *targetrows) && (target_ind > 0)){ data[j*(*rows) +ind] = (1.0- target_ind_double)*row_mean[target_ind-1] + target_ind_double*row_mean[target_ind]; } else if (target_ind >= *targetrows){ data[j*(*rows) +ind] = row_mean[*targetrows-1]; } else { data[j*(*rows) +ind] = row_mean[0]; } } } } } Free(dimat[0]); Free(dimat); Free(ranks); } #ifdef USE_PTHREADS void *using_target_group(void *data){ struct loop_data *args = (struct loop_data *) data; using_target(args->data, args->rows, args->cols, args->row_mean, args->row_meanlength, args->start_col, args->end_col); } #endif /***************************************************************** ** ** int qnorm_c_using_target(double *data, int *rows, int *cols, double *target, int *targetrows) ** ** double *data - a matrix of data to be normalized ** int *rows - dimensions of data ** int *cols - dimensions of data ** double *target - vector containing target distribution (ie distribution to be ** normalized to) ** int *targetrows - length of target distribution vector ** ** ** if targetrows == rows then the standard methodology is used. ** ** in other cases the appropriate quantiles to be normalized to are determined in a method ** equivalent to what you get using "type 8" with the quantile function ** ** Note sample percentiles are calculated using i/(n+1) (ie if there is ** only 2 observations, the first sample percentile is 1/3 = 0.333, ** the second sample percentile will be 2/3 = 0.6666 ** ** ** *****************************************************************/ int qnorm_c_using_target(double *data, int *rows, int *cols, double *target, int *targetrows){ int i; double *row_mean; int targetnon_na = 0; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif row_mean = (double *)Calloc(*targetrows,double); /* first find the normalizing distribution */ for (i =0; i < *targetrows; i++){ if (ISNA(target[i])){ } else { row_mean[targetnon_na] = target[i]; targetnon_na++; } } qsort(row_mean,targetnon_na,sizeof(double),(int(*)(const void*, const void*))sort_double); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of columns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of columns is less than the number of threads */ if (num_threads < *cols){ chunk_size = *cols/num_threads; chunk_size_d = ((double) *cols)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((*cols < num_threads ? *cols : num_threads), struct loop_data); args[0].data = data; args[0].row_mean = row_mean; args[0].rows = rows; args[0].cols = cols; args[0].row_meanlength = &targetnon_na; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < *cols; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_col = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_col = i+chunk_size; i++; } else{ args[t].end_col = i+chunk_size-1; } t++; } /* Determining the quantile normalization target distribution */ for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, using_target_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else using_target(data, rows, cols, row_mean, &targetnon_na, 0, *cols -1); #endif Free(row_mean); return 0; } void determine_target(double *data, double *row_mean, int *rows, int *cols, int start_col, int end_col){ int i,j,row_mean_ind; double *datvec; double row_mean_ind_double,row_mean_ind_double_floor; double samplepercentile; int non_na; #ifdef USE_PTHREADS long double *row_submean = (long double *)Calloc((*rows), long double); #endif datvec = (double *)Calloc(*rows,double); /* first find the normalizing distribution */ for (j = start_col; j <= end_col; j++){ non_na = 0; for (i =0; i < *rows; i++){ if (ISNA(data[j*(*rows) + i])){ } else { datvec[non_na] = data[j*(*rows) + i]; non_na++; } } if (non_na == *rows){ /* no NA values */ qsort(datvec,*rows,sizeof(double),(int(*)(const void*, const void*))sort_double); for (i =0; i < *rows; i++){ #ifdef USE_PTHREADS row_submean[i] += datvec[i]; #else row_mean[i] += datvec[i]/((double)*cols); #endif } } else { /* Use the observed data (non NA) values to estimate the distribution */ /* Note that some of the variable names here might be a little confusing. Probably because I copied the code from below */ qsort(datvec,non_na,sizeof(double),(int(*)(const void*, const void*))sort_double); for (i =0; i < *rows; i++){ samplepercentile = (double)(i)/(double)(*rows-1); /* Rprintf("%f\n",samplepercentile); */ /* row_mean_ind_double = 1.0/3.0 + ((double)(*rows) + 1.0/3.0) * samplepercentile; */ row_mean_ind_double = 1.0 + ((double)(non_na) -1.0) * samplepercentile; row_mean_ind_double_floor = floor(row_mean_ind_double + 4*DOUBLE_EPS); row_mean_ind_double = row_mean_ind_double - row_mean_ind_double_floor; if (fabs(row_mean_ind_double) <= 4*DOUBLE_EPS){ row_mean_ind_double = 0.0; } if (row_mean_ind_double == 0.0){ row_mean_ind = (int)floor(row_mean_ind_double_floor + 0.5); /* (int)nearbyint(row_mean_ind_double_floor); */ #ifdef USE_PTHREADS row_submean[i]+= datvec[row_mean_ind-1]; #else row_mean[i]+= datvec[row_mean_ind-1]/((double)*cols); #endif } else if (row_mean_ind_double == 1.0){ row_mean_ind = (int)floor(row_mean_ind_double_floor + 1.5); /* (int)nearbyint(row_mean_ind_double_floor + 1.0); */ #ifdef USE_PTHREADS row_submean[i]+= datvec[row_mean_ind-1]; #else row_mean[i]+= datvec[row_mean_ind-1]/((double)*cols); #endif } else { row_mean_ind = (int)floor(row_mean_ind_double_floor + 0.5); /* (int)nearbyint(row_mean_ind_double_floor); */ if ((row_mean_ind < *rows) && (row_mean_ind > 0)){ #ifdef USE_PTHREADS row_submean[i]+= ((1.0- row_mean_ind_double)*datvec[row_mean_ind-1] + row_mean_ind_double*datvec[row_mean_ind]); #else row_mean[i]+= ((1.0- row_mean_ind_double)*datvec[row_mean_ind-1] + row_mean_ind_double*datvec[row_mean_ind])/((double)*cols); #endif } else if (row_mean_ind >= *rows){ #ifdef USE_PTHREADS row_submean[i]+= datvec[non_na-1]; #else row_mean[i]+= datvec[non_na-1]/((double)*cols); #endif } else { #ifdef USE_PTHREADS row_submean[i]+= datvec[0]; #else row_mean[i]+= datvec[0]/((double)*cols); #endif } } } } } #ifdef USE_PTHREADS /* add to the global running total, will do the division after all threads finish (for precision of the result) */ pthread_mutex_lock (&mutex_R); for (i = 0; i < *rows; i++){ row_mean[i] += (double) row_submean[i]; } pthread_mutex_unlock (&mutex_R); #endif Free(datvec); } #ifdef USE_PTHREADS void *determine_target_group(void *data){ struct loop_data *args = (struct loop_data *) data; determine_target(args->data, args->row_mean, args->rows, args->cols, args->start_col, args->end_col); } #endif int qnorm_c_determine_target(double *data, int *rows, int *cols, double *target, int *targetrows){ int i,j,row_mean_ind; double *row_mean = (double *)Calloc((*rows),double); double row_mean_ind_double,row_mean_ind_double_floor; double samplepercentile; int non_na; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif #if defined(USE_PTHREADS) nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of columns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of columns is less than the number of threads */ if (num_threads < *cols){ chunk_size = *cols/num_threads; chunk_size_d = ((double) *cols)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((*cols < num_threads ? *cols : num_threads), struct loop_data); args[0].data = data; args[0].row_mean = row_mean; args[0].rows = rows; args[0].cols = cols; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < *cols; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_col = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_col = i+chunk_size; i++; } else{ args[t].end_col = i+chunk_size-1; } t++; } /* Determining the quantile normalization target distribution */ for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, determine_target_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } /* When in threaded mode, row_mean is the sum, waiting for a final division here, to maintain precision */ for (i = 0; i < *rows; i++){ row_mean[i] /= (double)*cols; } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else normalize_determine_target(data,row_mean,rows,cols,0,*cols-1); #endif if (*rows == *targetrows){ for (i =0; i < *rows; i++){ target[i] = row_mean[i]; } } else { /* need to estimate quantiles */ for (i =0; i < *targetrows; i++){ samplepercentile = (double)(i)/(double)(*targetrows -1); /* row_mean_ind_double = 1.0/3.0 + ((double)(*rows) + 1.0/3.0) * samplepercentile; */ row_mean_ind_double = 1.0 + ((double)(*rows) -1.0) * samplepercentile; row_mean_ind_double_floor = floor(row_mean_ind_double + 4*DOUBLE_EPS); row_mean_ind_double = row_mean_ind_double - row_mean_ind_double_floor; if (fabs(row_mean_ind_double) <= 4*DOUBLE_EPS){ row_mean_ind_double = 0.0; } if (row_mean_ind_double == 0.0){ row_mean_ind = (int)floor(row_mean_ind_double_floor + 0.5); /* (int)nearbyint(row_mean_ind_double_floor); */ target[i] = row_mean[row_mean_ind-1]; } else if (row_mean_ind_double == 1.0){ row_mean_ind = (int)floor(row_mean_ind_double_floor + 1.5); /* (int)nearbyint(row_mean_ind_double_floor + 1.0); */ target[i] = row_mean[row_mean_ind-1]; } else { row_mean_ind = (int)floor(row_mean_ind_double_floor + 0.5); /* (int)nearbyint(row_mean_ind_double_floor); */ if ((row_mean_ind < *rows) && (row_mean_ind > 0)){ target[i] = (1.0- row_mean_ind_double)*row_mean[row_mean_ind-1] + row_mean_ind_double*row_mean[row_mean_ind]; } else if (row_mean_ind >= *rows){ target[i] = row_mean[*rows-1]; } else { target[i] = row_mean[0]; } } } } Free(row_mean); return 0; } SEXP R_qnorm_using_target(SEXP X, SEXP target,SEXP copy){ SEXP Xcopy,dim1; /*,dim2; */ int rows, cols; int target_rows, target_cols; double *Xptr; double *targetptr; PROTECT(dim1 = getAttrib(X,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); if (asInteger(copy)){ PROTECT(Xcopy = allocMatrix(REALSXP,rows,cols)); copyMatrix(Xcopy,X,0); } else { Xcopy = X; } Xptr = NUMERIC_POINTER(AS_NUMERIC(Xcopy)); if (isVector(target)){ target_rows = length(target); } else if (isMatrix(target)){ PROTECT(dim1 = getAttrib(X,R_DimSymbol)); target_rows = INTEGER(dim1)[0]; target_cols = INTEGER(dim1)[1]; UNPROTECT(1); target_rows = target_rows*target_cols; } targetptr = NUMERIC_POINTER(AS_NUMERIC(target)); qnorm_c_using_target(Xptr, &rows, &cols,targetptr,&target_rows); if (asInteger(copy)){ UNPROTECT(1); } return Xcopy; } SEXP R_qnorm_determine_target(SEXP X, SEXP targetlength){ SEXP dim1,target; int rows, cols; int length; double *Xptr; double *targetptr; PROTECT(dim1 = getAttrib(X,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); length = asInteger(targetlength); /* Rprintf("%d\n",length);*/ PROTECT(target=allocVector(REALSXP,length)); Xptr = NUMERIC_POINTER(AS_NUMERIC(X)); targetptr = NUMERIC_POINTER(target); qnorm_c_determine_target(Xptr,&rows,&cols,targetptr, &length); UNPROTECT(1); return target; } /********************************************************* ** ** void qnorm_c_handleNA(double *data, int *rows, int *cols) ** ** this is the function that actually implements the ** quantile normalization algorithm. It is called from R. ** ** returns 1 if there is a problem, 0 otherwise ** ** Note that this function does not handle missing data (ie NA) ** ********************************************************/ void qnorm_c_handleNA(double *data, int *rows, int *cols){ double *target = Calloc(*rows,double); qnorm_c_determine_target(data, rows, cols, target, rows); qnorm_c_using_target(data, rows, cols, target, rows); Free(target); } /********************************************************* ** ** SEXP R_qnorm_c_handleNA(SEXP X) ** ** SEXP X - a matrix ** SEXP copy - a flag if TRUE then make copy ** before normalizing, if FALSE work in place ** note that this can be dangerous since ** it will change the original matrix. ** ** returns a quantile normalized matrix. ** ** This is a .Call() interface for quantile normalization ** *********************************************************/ SEXP R_qnorm_c_handleNA(SEXP X, SEXP copy){ SEXP Xcopy,dim1; double *Xptr; int rows,cols; PROTECT(dim1 = getAttrib(X,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; if (asInteger(copy)){ PROTECT(Xcopy = allocMatrix(REALSXP,rows,cols)); copyMatrix(Xcopy,X,0); } else { Xcopy = X; } Xptr = NUMERIC_POINTER(AS_NUMERIC(Xcopy)); qnorm_c_handleNA(Xptr, &rows, &cols); if (asInteger(copy)){ UNPROTECT(2); } else { UNPROTECT(1); } return Xcopy; } /***************************************************************************************************** ***************************************************************************************************** ** ** The following block of code implements quantile normalization within blocks. ** What this means is that the normalization is still carried out across arrrays (or columns) ** but separate subsets of rows (these are blocks) each get there own normalization ** ***************************************************************************************************** *****************************************************************************************************/ /***************************************************************** ** ** int qnorm_c_within_blocks(double *x, int *rows, int *cols, int *blocks) ** ** double *x - matrix to be normalized ** int *rows - dimensions of the matrix ** int *cols - ** int *blocks - labeling telling which block each row belongs to. ** *****************************************************************/ int qnorm_c_within_blocks(double *x, int *rows, int *cols, int *blocks){ int i,j,ind; dataitem_block **dimat_block; /* double sum; */ double *row_mean = (double *)Calloc((*rows),double); double *ranks = (double *)Calloc((*rows),double); dimat_block = (dataitem_block **)Calloc(1,dataitem_block *); dimat_block[0] = (dataitem_block *)Calloc(*rows,dataitem_block); for (i =0; i < *rows; i++){ row_mean[i] = 0.0; } /* first find the normalizing distribution */ for (j = 0; j < *cols; j++){ for (i =0; i < *rows; i++){ dimat_block[0][i].data = x[j*(*rows) + i]; dimat_block[0][i].block = blocks[i]; } qsort(dimat_block[0],*rows,sizeof(dataitem_block),sort_fn_blocks); /* for (i=0; i < *rows; i++){ Rprintf("%f %d\n",dimat_block[0][i].data,dimat_block[0][i].block); } */ for (i =0; i < *rows; i++){ row_mean[i] += dimat_block[0][i].data/((double)*cols); } } /* now assign back distribution */ for (j = 0; j < *cols; j++){ for (i =0; i < *rows; i++){ dimat_block[0][i].data = x[j*(*rows) + i]; dimat_block[0][i].block = blocks[i]; dimat_block[0][i].rank = i; } qsort(dimat_block[0],*rows,sizeof(dataitem_block),sort_fn_blocks); get_ranks_blocks(ranks,dimat_block[0],*rows); for (i =0; i < *rows; i++){ ind = dimat_block[0][i].rank; if (ranks[i] - floor(ranks[i]) > 0.4){ x[j*(*rows) +ind] = 0.5*(row_mean[(int)floor(ranks[i])-1] + row_mean[(int)floor(ranks[i])]); } else { x[j*(*rows) +ind] = row_mean[(int)floor(ranks[i])-1]; } } } Free(ranks); Free(dimat_block[0]); Free(dimat_block); Free(row_mean); return 0; } SEXP R_qnorm_within_blocks(SEXP X,SEXP blocks,SEXP copy){ SEXP Xcopy,dim1,blocksint; double *Xptr; int *blocksptr; int rows,cols; PROTECT(dim1 = getAttrib(X,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); if (asInteger(copy)){ PROTECT(Xcopy = allocMatrix(REALSXP,rows,cols)); copyMatrix(Xcopy,X,0); } else { Xcopy = X; } PROTECT(blocksint = coerceVector(blocks,INTSXP)); Xptr = NUMERIC_POINTER(AS_NUMERIC(Xcopy)); blocksptr = INTEGER_POINTER(blocksint); qnorm_c_within_blocks(Xptr, &rows, &cols,blocksptr); if (asInteger(copy)){ UNPROTECT(2); } else { UNPROTECT(1); } return Xcopy; } /***************************************************************************************************** ***************************************************************************************************** ** ** The following block of code provides quantile normalization where a specified target vector is given. ** In addition it deals with cases of unequal length by estimating the appropriate quantiles. ** However, unlike the above code it allows a specific subset of probes to be used for determining ** the target distribution. Then applies it so that the subset gets given the target distribution ** and markers not used for forming the target are adjusted relative to it. ** ***************************************************************************************************** *****************************************************************************************************/ void determine_target_via_subset(double *data, double *row_mean, int *rows, int *cols, int *in_subset, int start_col, int end_col){ int i,j,row_mean_ind; double *datvec; double row_mean_ind_double,row_mean_ind_double_floor; double samplepercentile; int non_na; #ifdef USE_PTHREADS long double *row_submean = (long double *)Calloc((*rows), long double); #endif datvec = (double *)Calloc(*rows,double); /* first find the normalizing distribution */ for (j = start_col; j <= end_col; j++){ non_na = 0; for (i =0; i < *rows; i++){ if (ISNA(data[j*(*rows) + i]) || in_subset[i] == 0){ } else { datvec[non_na] = data[j*(*rows) + i]; non_na++; } } if (non_na == *rows){ /* no NA values */ qsort(datvec,*rows,sizeof(double),(int(*)(const void*, const void*))sort_double); for (i =0; i < *rows; i++){ #ifdef USE_PTHREADS row_submean[i] += datvec[i]; #else row_mean[i] += datvec[i]/((double)*cols); #endif } } else { /* Use the observed data (non NA) values to estimate the distribution */ /* Note that some of the variable names here might be a little confusing. Probably because I copied the code from below */ qsort(datvec,non_na,sizeof(double),(int(*)(const void*, const void*))sort_double); for (i =0; i < *rows; i++){ samplepercentile = (double)(i)/(double)(*rows-1); /* Rprintf("%f\n",samplepercentile); */ /* row_mean_ind_double = 1.0/3.0 + ((double)(*rows) + 1.0/3.0) * samplepercentile; */ row_mean_ind_double = 1.0 + ((double)(non_na) -1.0) * samplepercentile; row_mean_ind_double_floor = floor(row_mean_ind_double + 4*DOUBLE_EPS); row_mean_ind_double = row_mean_ind_double - row_mean_ind_double_floor; if (fabs(row_mean_ind_double) <= 4*DOUBLE_EPS){ row_mean_ind_double = 0.0; } if (row_mean_ind_double == 0.0){ row_mean_ind = (int)floor(row_mean_ind_double_floor + 0.5); /* (int)nearbyint(row_mean_ind_double_floor); */ #ifdef USE_PTHREADS row_submean[i]+= datvec[row_mean_ind-1]; #else row_mean[i]+= datvec[row_mean_ind-1]/((double)*cols); #endif } else if (row_mean_ind_double == 1.0){ row_mean_ind = (int)floor(row_mean_ind_double_floor + 1.5); /* (int)nearbyint(row_mean_ind_double_floor + 1.0); */ #ifdef USE_PTHREADS row_submean[i]+= datvec[row_mean_ind-1]; #else row_mean[i]+= datvec[row_mean_ind-1]/((double)*cols); #endif } else { row_mean_ind = (int)floor(row_mean_ind_double_floor + 0.5); /* (int)nearbyint(row_mean_ind_double_floor); */ if ((row_mean_ind < *rows) && (row_mean_ind > 0)){ #ifdef USE_PTHREADS row_submean[i]+= ((1.0- row_mean_ind_double)*datvec[row_mean_ind-1] + row_mean_ind_double*datvec[row_mean_ind]); #else row_mean[i]+= ((1.0- row_mean_ind_double)*datvec[row_mean_ind-1] + row_mean_ind_double*datvec[row_mean_ind])/((double)*cols); #endif } else if (row_mean_ind >= *rows){ #ifdef USE_PTHREADS row_submean[i]+= datvec[non_na-1]; #else row_mean[i]+= datvec[non_na-1]/((double)*cols); #endif } else { #ifdef USE_PTHREADS row_submean[i]+= datvec[0]; #else row_mean[i]+= datvec[0]/((double)*cols); #endif } } } } } #ifdef USE_PTHREADS /* add to the global running total, will do the division after all threads finish (for precision of the result) */ pthread_mutex_lock (&mutex_R); for (i = 0; i < *rows; i++){ row_mean[i] += (double) row_submean[i]; } pthread_mutex_unlock (&mutex_R); #endif Free(datvec); } #ifdef USE_PTHREADS void *determine_target_group_via_subset(void *data){ struct loop_data *args = (struct loop_data *) data; determine_target_via_subset(args->data, args->row_mean, args->rows, args->cols, args->in_subset, args->start_col, args->end_col); } #endif int qnorm_c_determine_target_via_subset(double *data, int *rows, int *cols, int *in_subset, double *target, int *targetrows){ int i,j,row_mean_ind; double *row_mean = (double *)Calloc((*rows),double); double row_mean_ind_double,row_mean_ind_double_floor; double samplepercentile; int non_na; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif #if defined(USE_PTHREADS) nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of columns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of columns is less than the number of threads */ if (num_threads < *cols){ chunk_size = *cols/num_threads; chunk_size_d = ((double) *cols)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((*cols < num_threads ? *cols : num_threads), struct loop_data); args[0].data = data; args[0].row_mean = row_mean; args[0].rows = rows; args[0].cols = cols; args[0].in_subset = in_subset; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < *cols; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_col = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_col = i+chunk_size; i++; } else{ args[t].end_col = i+chunk_size-1; } t++; } /* Determining the quantile normalization target distribution */ for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, determine_target_group_via_subset, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } /* When in threaded mode, row_mean is the sum, waiting for a final division here, to maintain precision */ for (i = 0; i < *rows; i++){ row_mean[i] /= (double)*cols; } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else determine_target_via_subset(data, row_mean, rows, cols, in_subset, 0,*cols-1); #endif if (*rows == *targetrows){ for (i =0; i < *rows; i++){ target[i] = row_mean[i]; } } else { /* need to estimate quantiles */ for (i =0; i < *targetrows; i++){ samplepercentile = (double)(i)/(double)(*targetrows -1); /* row_mean_ind_double = 1.0/3.0 + ((double)(*rows) + 1.0/3.0) * samplepercentile; */ row_mean_ind_double = 1.0 + ((double)(*rows) -1.0) * samplepercentile; row_mean_ind_double_floor = floor(row_mean_ind_double + 4*DOUBLE_EPS); row_mean_ind_double = row_mean_ind_double - row_mean_ind_double_floor; if (fabs(row_mean_ind_double) <= 4*DOUBLE_EPS){ row_mean_ind_double = 0.0; } if (row_mean_ind_double == 0.0){ row_mean_ind = (int)floor(row_mean_ind_double_floor + 0.5); /* (int)nearbyint(row_mean_ind_double_floor); */ target[i] = row_mean[row_mean_ind-1]; } else if (row_mean_ind_double == 1.0){ row_mean_ind = (int)floor(row_mean_ind_double_floor + 1.5); /* (int)nearbyint(row_mean_ind_double_floor + 1.0); */ target[i] = row_mean[row_mean_ind-1]; } else { row_mean_ind = (int)floor(row_mean_ind_double_floor + 0.5); /* (int)nearbyint(row_mean_ind_double_floor); */ if ((row_mean_ind < *rows) && (row_mean_ind > 0)){ target[i] = (1.0- row_mean_ind_double)*row_mean[row_mean_ind-1] + row_mean_ind_double*row_mean[row_mean_ind]; } else if (row_mean_ind >= *rows){ target[i] = row_mean[*rows-1]; } else { target[i] = row_mean[0]; } } } } Free(row_mean); return 0; } /****************************************************************** ** ** double linear_interpolate_helper(double v, double *x, double *y, int n) ** ** double v ** double *x ** double *y ** int n ** ** linearly interpolate v given x and y. ** ** **********************************************************************/ static double linear_interpolate_helper(double v, double *x, double *y, int n) { int i, j, ij; i = 0; j = n - 1; if(v < x[i]) return y[0]; if(v > x[j]) return y[n-1]; /* find the correct interval by bisection */ while(i < j - 1) { /* x[i] <= v <= x[j] */ ij = (i + j)/2; /* i+1 <= ij <= j-1 */ if(v < x[ij]) j = ij; else i = ij; /* still i < j */ } /* provably have i == j-1 */ /* interpolation */ if(v == x[j]) return y[j]; if(v == x[i]) return y[i]; /* impossible: if(x[j] == x[i]) return y[i]; */ return y[i] + (y[j] - y[i]) * ((v - x[i])/(x[j] - x[i])); } static void using_target_via_subset_part1(double *data, int *rows, int *cols, int *in_subset, double *target, int *targetrows, int start_col, int end_col, int subset_count){ int i,j,ind,target_ind; dataitem **dimat; double *row_mean = target; double *ranks = (double *)Calloc((*rows),double); double samplepercentile; double target_ind_double,target_ind_double_floor; int targetnon_na = *targetrows; int non_na = 0; double *sample_percentiles; double *datvec; sample_percentiles = (double *)Calloc(subset_count, double); datvec = (double *)Calloc(*rows,double); dimat = (dataitem **)Calloc(1,dataitem *); dimat[0] = (dataitem *)Calloc(*rows,dataitem); for (j = start_col; j <= end_col; j++){ /* First figure out percentiles of the "subset" data */ non_na = 0; for (i =0; i < *rows; i++){ if (!ISNA(data[j*(*rows) + i]) && (in_subset[i] == 1)){ dimat[0][non_na].data = data[j*(*rows) + i]; dimat[0][non_na].rank = i; non_na++; } } qsort(dimat[0],non_na,sizeof(dataitem),sort_fn); get_ranks(ranks,dimat[0],non_na); for (i=0; i < non_na; i++){ sample_percentiles[i] = (double)(ranks[i] - 1)/(double)(non_na-1); datvec[i] = dimat[0][i].data; } /* Now try to estimate what percentile of the "subset" data each datapoint in the "non-subset" data falls */ for (i =0; i < *rows; i++){ /*Linear interpolate to get sample percentile */ if (in_subset[i] == 0 && !ISNA(data[j*(*rows) + i])){ samplepercentile = linear_interpolate_helper(data[j*(*rows) + i], datvec, sample_percentiles, non_na); target_ind_double = 1.0 + ((double)(targetnon_na) - 1.0) * samplepercentile; target_ind_double_floor = floor(target_ind_double + 4*DOUBLE_EPS); target_ind_double = target_ind_double - target_ind_double_floor; if (fabs(target_ind_double) <= 4*DOUBLE_EPS){ target_ind_double = 0.0; } if (target_ind_double == 0.0){ target_ind = (int)floor(target_ind_double_floor + 0.5); /* nearbyint(target_ind_double_floor); */ ind = dimat[0][i].rank; data[j*(*rows) +i] = row_mean[target_ind-1]; } else if (target_ind_double == 1.0){ target_ind = (int)floor(target_ind_double_floor + 1.5); /* (int)nearbyint(target_ind_double_floor + 1.0); */ ind = dimat[0][i].rank; data[j*(*rows) +i] = row_mean[target_ind-1]; } else { target_ind = (int)floor(target_ind_double_floor + 0.5); /* nearbyint(target_ind_double_floor); */ ind = dimat[0][i].rank; if ((target_ind < *targetrows) && (target_ind > 0)){ data[j*(*rows) +i] = (1.0- target_ind_double)*row_mean[target_ind-1] + target_ind_double*row_mean[target_ind]; } else if (target_ind >= *targetrows){ data[j*(*rows) +i] = row_mean[*targetrows-1]; } else { data[j*(*rows) +i] = row_mean[0]; } } } } } Free(dimat[0]); Free(dimat); Free(datvec); Free(sample_percentiles); } static void using_target_via_subset_part2(double *data, int *rows, int *cols, int *in_subset, double *target, int *targetrows, int start_col, int end_col, int subset_count){ int i,j,ind,target_ind; dataitem **dimat; double *row_mean = target; double *ranks = (double *)Calloc((*rows),double); double samplepercentile; double target_ind_double,target_ind_double_floor; int targetnon_na = *targetrows; int non_na = 0; double *sample_percentiles; double *datvec; if (*rows == targetnon_na){ /* now assign back distribution */ /* this is basically the standard story */ dimat = (dataitem **)Calloc(1,dataitem *); dimat[0] = (dataitem *)Calloc(*rows,dataitem); for (j = start_col; j <= end_col; j++){ non_na = 0; for (i =0; i < *rows; i++){ if (!ISNA(data[j*(*rows) + i]) && (in_subset[i] == 1)){ dimat[0][non_na].data = data[j*(*rows) + i]; dimat[0][non_na].rank = i; non_na++; } } if (non_na == *rows){ qsort(dimat[0],*rows,sizeof(dataitem),sort_fn); get_ranks(ranks,dimat[0],*rows); for (i =0; i < *rows; i++){ ind = dimat[0][i].rank; if (ranks[i] - floor(ranks[i]) > 0.4){ data[j*(*rows) +ind] = 0.5*(row_mean[(int)floor(ranks[i])-1] + row_mean[(int)floor(ranks[i])]); } else { data[j*(*rows) +ind] = row_mean[(int)floor(ranks[i])-1]; } } } else { /* we are going to have to estimate the quantiles */ qsort(dimat[0],non_na,sizeof(dataitem),sort_fn); get_ranks(ranks,dimat[0],non_na); for (i =0; i < non_na; i++){ samplepercentile = (double)(ranks[i] - 1)/(double)(non_na-1); /* target_ind_double = 1.0/3.0 + ((double)(*targetrows) + 1.0/3.0) * samplepercentile; */ target_ind_double = 1.0 + ((double)(targetnon_na) - 1.0) * samplepercentile; target_ind_double_floor = floor(target_ind_double + 4*DOUBLE_EPS); target_ind_double = target_ind_double - target_ind_double_floor; if (fabs(target_ind_double) <= 4*DOUBLE_EPS){ target_ind_double = 0.0; } if (target_ind_double == 0.0){ target_ind = (int)floor(target_ind_double_floor + 0.5); /* nearbyint(target_ind_double_floor); */ ind = dimat[0][i].rank; data[j*(*rows) +ind] = row_mean[target_ind-1]; } else if (target_ind_double == 1.0){ target_ind = (int)floor(target_ind_double_floor + 1.5); /* (int)nearbyint(target_ind_double_floor + 1.0); */ ind = dimat[0][i].rank; data[j*(*rows) +ind] = row_mean[target_ind-1]; } else { target_ind = (int)floor(target_ind_double_floor + 0.5); /* nearbyint(target_ind_double_floor); */ ind = dimat[0][i].rank; if ((target_ind < *targetrows) && (target_ind > 0)){ data[j*(*rows) +ind] = (1.0- target_ind_double)*row_mean[target_ind-1] + target_ind_double*row_mean[target_ind]; } else if (target_ind >= *targetrows){ data[j*(*rows) +ind] = row_mean[*targetrows-1]; } else { data[j*(*rows) +ind] = row_mean[0]; } } } } } } else { /** the length of the target distribution and the size of the data matrix differ **/ /** need to estimate quantiles **/ dimat = (dataitem **)Calloc(1,dataitem *); dimat[0] = (dataitem *)Calloc(*rows,dataitem); for (j = start_col; j <= end_col; j++){ non_na = 0; for (i =0; i < *rows; i++){ if (!ISNA(data[j*(*rows) + i]) && (in_subset[i] == 1)){ dimat[0][non_na].data = data[j*(*rows) + i]; dimat[0][non_na].rank = i; non_na++; } } qsort(dimat[0],non_na,sizeof(dataitem),sort_fn); get_ranks(ranks,dimat[0],non_na); for (i =0; i < non_na; i++){ samplepercentile = (double)(ranks[i] - 1.0)/(double)(non_na -1); /* target_ind_double = 1.0/3.0 + ((double)(*targetrows) + 1.0/3.0) * samplepercentile; */ target_ind_double = 1.0 + ((double)(targetnon_na) - 1.0) * samplepercentile; target_ind_double_floor = floor(target_ind_double + 4*DOUBLE_EPS); target_ind_double = target_ind_double - target_ind_double_floor; if (fabs(target_ind_double) <= 4*DOUBLE_EPS){ target_ind_double = 0.0; } if (target_ind_double == 0.0){ target_ind = (int)floor(target_ind_double_floor + 0.5); /* nearbyint(target_ind_double_floor); */ ind = dimat[0][i].rank; data[j*(*rows) +ind] = row_mean[target_ind-1]; } else if (target_ind_double == 1.0){ target_ind = (int)floor(target_ind_double_floor + 1.5); /* (int)nearbyint(target_ind_double_floor + 1.0); */ ind = dimat[0][i].rank; data[j*(*rows) +ind] = row_mean[target_ind-1]; } else { target_ind = (int)floor(target_ind_double_floor + 0.5); /* nearbyint(target_ind_double_floor); */ ind = dimat[0][i].rank; if ((target_ind < *targetrows) && (target_ind > 0)){ data[j*(*rows) +ind] = (1.0- target_ind_double)*row_mean[target_ind-1] + target_ind_double*row_mean[target_ind]; } else if (target_ind >= *targetrows){ data[j*(*rows) +ind] = row_mean[*targetrows-1]; } else { data[j*(*rows) +ind] = row_mean[0]; } } } } } Free(dimat[0]); Free(dimat); Free(ranks); } void using_target_via_subset(double *data, int *rows, int *cols, int *in_subset, double *target, int *targetrows, int start_col, int end_col){ int i,j,ind,target_ind; dataitem **dimat; double *row_mean = target; double *ranks = (double *)Calloc((*rows),double); double samplepercentile; double target_ind_double,target_ind_double_floor; int targetnon_na = *targetrows; int non_na = 0; int subset_count = 0; double *sample_percentiles; double *datvec; /* Two parts to the algorithm */ /* First find out if the enitirety of the data is in the subset */ for (i = 0; i < *rows; i++){ if (in_subset[i] == 1){ subset_count++; } } /* Part 1: Adjust the elements not in the "subset" */ if (*rows > subset_count){ /* We have non subset elements to deal with */ using_target_via_subset_part1(data, rows, cols, in_subset, target, targetrows, start_col, end_col,subset_count); } /* Part 2: Adjust the elements in the "subset"*/ using_target_via_subset_part2(data, rows, cols, in_subset, target, targetrows, start_col, end_col,subset_count); } #ifdef USE_PTHREADS void *using_target_group_via_subset(void *data){ struct loop_data *args = (struct loop_data *) data; using_target_via_subset(args->data, args->rows, args->cols, args->in_subset, args->row_mean, args->row_meanlength, args->start_col, args->end_col); } #endif int qnorm_c_using_target_via_subset(double *data, int *rows, int *cols, int *in_subset, double *target, int *targetrows){ int i; double *row_mean; int targetnon_na = 0; #ifdef USE_PTHREADS int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif row_mean = (double *)Calloc(*targetrows,double); /* first find the normalizing distribution */ for (i =0; i < *targetrows; i++){ if (ISNA(target[i])){ } else { row_mean[targetnon_na] = target[i]; targetnon_na++; } } qsort(row_mean,targetnon_na,sizeof(double),(int(*)(const void*, const void*))sort_double); #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of columns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of columns is less than the number of threads */ if (num_threads < *cols){ chunk_size = *cols/num_threads; chunk_size_d = ((double) *cols)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((*cols < num_threads ? *cols : num_threads), struct loop_data); args[0].data = data; args[0].row_mean = row_mean; args[0].rows = rows; args[0].cols = cols; args[0].row_meanlength = &targetnon_na; args[0].in_subset = in_subset; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < *cols; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_col = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_col = i+chunk_size; i++; } else{ args[t].end_col = i+chunk_size-1; } t++; } /* Determining the quantile normalization target distribution */ for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, using_target_group_via_subset, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else using_target_via_subset(data, rows, cols, in_subset, row_mean, &targetnon_na, 0, *cols -1); #endif Free(row_mean); return 0; } SEXP R_qnorm_determine_target_via_subset(SEXP X, SEXP subset, SEXP targetlength){ SEXP dim1,target; int rows, cols; int length; double *Xptr; double *targetptr; int *subsetptr; PROTECT(dim1 = getAttrib(X,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); length = asInteger(targetlength); /* Rprintf("%d\n",length);*/ PROTECT(target=allocVector(REALSXP,length)); Xptr = NUMERIC_POINTER(AS_NUMERIC(X)); targetptr = NUMERIC_POINTER(target); subsetptr = INTEGER_POINTER(subset); qnorm_c_determine_target_via_subset(Xptr,&rows,&cols,subsetptr,targetptr, &length); UNPROTECT(1); return target; } SEXP R_qnorm_using_target_via_subset(SEXP X, SEXP subset, SEXP target,SEXP copy){ SEXP Xcopy,dim1; /*,dim2; */ int rows, cols; int target_rows, target_cols; double *Xptr; double *targetptr; int *subsetptr; PROTECT(dim1 = getAttrib(X,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; UNPROTECT(1); if (asInteger(copy)){ PROTECT(Xcopy = allocMatrix(REALSXP,rows,cols)); copyMatrix(Xcopy,X,0); } else { Xcopy = X; } Xptr = NUMERIC_POINTER(AS_NUMERIC(Xcopy)); if (isVector(target)){ target_rows = length(target); } else if (isMatrix(target)){ PROTECT(dim1 = getAttrib(X,R_DimSymbol)); target_rows = INTEGER(dim1)[0]; target_cols = INTEGER(dim1)[1]; UNPROTECT(1); target_rows = target_rows*target_cols; } targetptr = NUMERIC_POINTER(AS_NUMERIC(target)); subsetptr = INTEGER_POINTER(subset); qnorm_c_using_target_via_subset(Xptr, &rows, &cols,subsetptr,targetptr,&target_rows); if (asInteger(copy)){ UNPROTECT(1); } return Xcopy; } preprocessCore/src/qnorm.h0000644000126300012640000000171412127220006017222 0ustar00biocbuildphs_compbio#ifndef QNORM_H #define QNORM_H 1 #include #include #include #include int qnorm_c(double *data, int *rows, int *cols); int qnorm_robust_c(double *data,double *weights, int *rows, int *cols, int *use_median, int *use_log2, int *weight_scheme); int qnorm_c_using_target(double *data, int *rows, int *cols, double *target, int *targetrows); int qnorm_c_determine_target(double *data, int *rows, int *cols, double *target, int *targetrows); int qnorm_c_within_blocks(double *x, int *rows, int *cols, int *blocks); SEXP R_qnorm_c(SEXP X, SEXP copy); SEXP R_qnorm_robust_weights(SEXP X, SEXP remove_extreme, SEXP n_remove); SEXP R_qnorm_robust_c(SEXP X, SEXP copy, SEXP R_weights, SEXP R_use_median, SEXP R_use_log2, SEXP R_weight_scheme); SEXP R_qnorm_determine_target(SEXP X, SEXP targetlength); SEXP R_qnorm_using_target(SEXP X, SEXP target,SEXP copy); SEXP R_qnorm_within_blocks(SEXP X,SEXP blocks,SEXP copy); #endif preprocessCore/src/rlm.c0000644000126300012640000002761312127220006016661 0ustar00biocbuildphs_compbio/********************************************************************* ** ** file: rlm.c ** ** Aim: implement robust linear models. ** ** Copyright (C) 2003 Ben Bolstad ** ** created by: B. M. Bolstad ** ** created on: Jan 11, 2003 ** ** Last modified: Feb 11, 2003 ** ** The aim will be to provide a function that allows us to fit various ** robust linear models to affy data. Initially we will focus ** on huber regression. Code is inspired by rlm() method which is ** part of the MASS package bundle. ** ** ** History ** ** Jan 11, 2003 - Initial version ** Jan 12, 2003 - Continued implementing method. ** Jan 13, 2003 - Continued implementing. lm_wfit tested. ** rlm_fit tested. Integrate into AffyExtensions. ** Jan 29, 2003 - Be sure to check the fit to see if full rank. ** Jan 31, 2003 - Clean up/Expand Code comments ** Feb 10, 2003 - modify psi_huber so that weight, derivative and psi itself may be returned. ** this will fix a bug in se routines ** Feb 11, 2003 - clean up rlm_fit, so that there is no longer unnecesary ** memory allocation and copying of parameter/weights/se estimates. ** May 31, 2003 - Move psi_huber to psi_fns.c ** Jun 04, 2003 - Make rlm_fit accept a function pointer to allow ** more general psi functions ** Jun 05, 2003 - move lm_wfit to lm.c ** Sep 13, 2003 - rlm now has a parameter that controls the maximum number of iterations ** Apr 5, 2004 - all malloc/free are now Calloc/Free ** May 26, 2004 - rlm specialised for anova model. ** June 21, 2004 - fixed up specialization for default anova model. ** June 23, 2004 - move specialization to its own file ** July 26, 2004 - rlm_wfit added ** Mar 1, 2006 - change all comments to ansi style ** May 27, 2007 - clean up code for inclusion in preprocessCore ** ********************************************************************/ #include #include #include "rma_common.h" #include "rlm.h" #include "psi_fns.h" #include "lm.h" #include "matrix_functions.h" #include #include #include #include /*************************************************************** ** ** double irls_delta(double *old, double *new, int length) ** ** double *old - previous value of a vector ** double *new - new value of a vector ** int length - length of vector ** ** this function computes the sum of the difference of two vectors ** divides this by the sum squared of the old datavector. ** ** the aim of this function is compute something to test for ** convergence in the iteratively reweighted least squares (IRLS) ** ** **************************************************************/ double irls_delta(double *old, double *new, int length){ int i=0; double sum = 0.0; double sum2 =0.0; double divisor=1e-20; for (i=0; i < length; i++){ sum = sum + (old[i] - new[i])*(old[i]-new[i]); sum2 = sum2 + old[i]*old[i]; } if(sum2 >= divisor){ divisor = sum2; } return sqrt(sum/divisor); } /**************************************************************** ** ** This function is another method for computing convergence in the ** robust linear model fitting procedure. It is not currently used ** but is here in case it is required at a later date. ** ** irls.rrxwr <- function(x, w, r) { ** w <- sqrt(w) ** max(abs((matrix(r * w, 1, length(r)) %*% x)/sqrt(matrix(w,1, length(r))%*% (x^2))))/sqrt(sum(w * r^2)) ** } static double irls_rrxwr(double *x, double *w, double *r, int rows, int cols){ int i =0,j=0; double *weights = Calloc(rows,double); double *rw = Calloc(rows,double); double *wr2 = Calloc(rows,double); double *numerator = Calloc(cols,double); double *denominator = Calloc(cols,double); double max_num,sum=0.0; for (i =0; i < rows; i++){ weights[i] = sqrt(w[i]); rw[i] = weights[i]*r[i]; wr2[i] = weights[i]*r[i]*r[i]; } for (j=0; j < cols; j++){ for (i=0; i < rows; i++){ numerator[j] = numerator[j] + rw[i]*x[j*rows + i]; denominator[j] = denominator[j] + w[i]*x[j*rows + i]*x[j*rows + i]; } } for (j=0; j < cols; j++){ numerator[j] = fabs(numerator[j]/sqrt(denominator[j])); } max_num = numerator[0]; for (j=1; j < cols; j++){ if (numerator[j] > max_num){ max_num = numerator[j]; } } sum = 0.0; for (i=0; i < rows; i++){ sum+=wr2[i]; } Free(numerator); Free(denominator); Free(wr2); Free(rw); Free(weights); return(max_num/sum); } **********************************************************************************/ /********************************************************************************** ** ** double med_abs(double *x, int length) ** ** double *x - a vector of data ** int length - length of the vector. ** ** returns the median of the absolute values. ** ** computes the median of the absolute values of a given vector. ** **********************************************************************************/ double med_abs(double *x, int length){ int i; double med_abs; double *buffer = Calloc(length,double); for (i = 0; i < length; i++) buffer[i] = fabs(x[i]); med_abs = median(buffer,length); Free(buffer); return(med_abs); } /********************************************************************************** ** ** void rlm_fit(double *x, double *y, int rows, int cols, double *out_beta, double *out_resids, double *out_weights) ** ** double *x - model matrix: dimension rows*cols ** double *y - independent variable: dimension cols ** int rows,cols - dimensions of matrix ** double *out_beta - already allocated space to store beta estimates: length cols ** double *out_resids - already allocated space to store residuals: length rows ** double *out_weights - already allocated space to store regression weights: length rows ** ** This function fits a robust linear model using M estimation, convergence is ** determined by the change in residuals. ** ** **********************************************************************************/ void rlm_fit(double *x, double *y, int rows, int cols, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ int i; /* ,j; */ /* double k = 1.345; */ /* double k2 = 1.345; */ double tol = 1e-7; double acc = 1e-4; double scale =0.0; double conv; /* int max_iter=20; */ int iter; double *wts = out_weights; double *beta = out_beta; double *resids = out_resids; double *old_resids = Calloc(rows,double); if (!initialized){ /* intially use equal weights */ for (i=0; i < rows; i++){ wts[i] = 1.0; } /* get our intial beta estimates by standard linear regression */ lm_wfit(x, y, wts, rows, cols, tol, beta, resids); } /* printf("%f %f %f\n",beta[0],beta[1],beta[2]); */ /* done <- FALSE conv <- NULL n1 <- nrow(x) - ncol(x) if (scale.est != "MM") scale <- mad(resid, 0) theta <- 2 * pnorm(k2) - 1 gamma <- theta + k2^2 * (1 - theta) - 2 * k2 * dnorm(k2) */ for (iter = 0; iter < max_iter; iter++){ scale = med_abs(resids,rows)/0.6745; if (fabs(scale) < 1e-10){ /*printf("Scale too small \n"); */ break; } for (i =0; i < rows; i++){ old_resids[i] = resids[i]; } for (i=0; i < rows; i++){ wts[i] = PsiFn(resids[i]/scale,psi_k,0); /* psi_huber(resids[i]/scale,k,0); */ } lm_wfit(x, y, wts, rows, cols, tol, beta, resids); /*check convergence based on residuals */ conv = irls_delta(old_resids,resids, rows); if (conv < acc){ /* printf("Converged \n");*/ break; } } /* for (j=0; j < cols; j++){ out_beta[j] = beta[j]; } */ /* for (i=0; i < rows; i++){ out_resids[i] = resids[i];*/ /* out_weights[i] = wts[i]; */ /* } */ Free(old_resids); } /********************************************************************************** ** ** void rlm_fit_R(double *x, double *y, int *rows, int *cols, double *out_beta, double *out_resids, double *out_weights) ** ** double *x - model matrix: dimension rows*cols ** double *y - independent variable: dimension cols ** int *rows,*cols - dimensions of matrix ** double *out_beta - already allocated space to store beta estimates: length cols ** double *out_resids - already allocated space to store residuals: length rows ** double *out_weights - already allocated space to store regression weights: length rows ** ** A wrapper function that allows us to use .C() in R to test robust linear model ** fitting function. ** **********************************************************************************/ void rlm_fit_R(double *x, double *y, int *rows, int *cols, double *out_beta, double *out_resids, double *out_weights){ rlm_fit(x, y, *rows, *cols, out_beta, out_resids,out_weights,psi_huber,1.345, 20,0); } /********************************************************************************** ** ** void rlm_wfit(double *x, double *y, int rows, int cols, double *out_beta, double *out_resids, double *out_weights) ** ** double *x - model matrix: dimension rows*cols ** double *y - independent variable: dimension cols ** int rows,cols - dimensions of matrix ** double *out_beta - already allocated space to store beta estimates: length cols ** double *out_resids - already allocated space to store residuals: length rows ** double *out_weights - already allocated space to store regression weights: length rows ** ** This function fits a robust linear model using M estimation, convergence is ** determined by the change in residuals. ** ** **********************************************************************************/ void rlm_wfit(double *x, double *y, double *w, int rows, int cols, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ int i; /* ,j; */ /* double k = 1.345; */ /* double k2 = 1.345; */ double tol = 1e-7; double acc = 1e-4; double scale =0.0; double conv; /* int max_iter=20; */ int iter; double *wts = out_weights; double *beta = out_beta; double *resids = out_resids; double *old_resids = Calloc(rows,double); if (!initialized){ /* intially use equal weights */ for (i=0; i < rows; i++){ wts[i] = w[i]*1.0; } /* get our intial beta estimates by standard linear regression */ lm_wfit(x, y, wts, rows, cols, tol, beta, resids); } /* printf("%f %f %f\n",beta[0],beta[1],beta[2]); */ /* done <- FALSE conv <- NULL n1 <- nrow(x) - ncol(x) if (scale.est != "MM") scale <- mad(resid, 0) theta <- 2 * pnorm(k2) - 1 gamma <- theta + k2^2 * (1 - theta) - 2 * k2 * dnorm(k2) */ for (iter = 0; iter < max_iter; iter++){ scale = med_abs(resids,rows)/0.6745; if (fabs(scale) < 1e-10){ /*printf("Scale too small \n"); */ break; } for (i =0; i < rows; i++){ old_resids[i] = resids[i]; } for (i=0; i < rows; i++){ wts[i] = w[i]*PsiFn(resids[i]/scale,psi_k,0); /* psi_huber(resids[i]/scale,k,0); */ } lm_wfit(x, y, wts, rows, cols, tol, beta, resids); /*check convergence based on residuals */ conv = irls_delta(old_resids,resids, rows); if (conv < acc){ /* printf("Converged \n");*/ break; } } /* for (j=0; j < cols; j++){ out_beta[j] = beta[j]; } */ /* for (i=0; i < rows; i++){ out_resids[i] = resids[i];*/ /* out_weights[i] = wts[i]; */ /* } */ Free(old_resids); } void rlm_wfit_R(double *x, double *y, double *w, int *rows, int *cols, double *out_beta, double *out_resids, double *out_weights){ rlm_wfit(x, y, w, *rows, *cols, out_beta, out_resids,out_weights,psi_huber,1.345, 20,0); } preprocessCore/src/rlm.h0000644000126300012640000000460112127220006016656 0ustar00biocbuildphs_compbio#ifndef RLM_H #define RLM_H 1 void rlm_fit(double *x, double *y, int rows, int cols, double *out_beta, double *out_resids, double *out_weights, double (* PsiFn)(double, double, int), double psi_k, int max_iter,int initialized); void rlm_wfit(double *x, double *y, double *w, int rows, int cols, double *out_beta, double *out_resids, double *out_weights, double (* PsiFn)(double, double, int), double psi_k, int max_iter,int initialized); double med_abs(double *x, int length); double irls_delta(double *old, double *new, int length); void rlm_fit_anova(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void rlm_fit_anova_scale(double *y, int y_rows, int y_cols, double *scale, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void rlm_wfit_anova(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void rlm_wfit_anova_scale(double *y, int y_rows, int y_cols, double *scale,double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void rlm_fit_anova_given_probe_effects(double *y, int y_rows, int y_cols, double *probe_effects, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void rlm_wfit_anova_given_probe_effects(double *y, int y_rows, int y_cols, double *probe_effects, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void rlm_wfit_anova_given_probe_effects_scale(double *y, int y_rows, int y_cols, double *scale, double *probe_effects, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); void rlm_fit_anova_given_probe_effects_scale(double *y, int y_rows, int y_cols, double *input_scale, double *probe_effects, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized); #endif preprocessCore/src/rlm_anova.c0000644000126300012640000012201412127220006020034 0ustar00biocbuildphs_compbio/********************************************************************* ** ** file: rlm_anova.c ** ** Aim: implement robust linear models specialized to samples + probes model. ** ** Copyright (C) 2004-2009 Ben Bolstad ** ** created by: B. M. Bolstad ** ** created on: June 23, 2003 ** ** Last modified: June 23, 2003 ** ** History: ** July 29, 2004 - change routines so output order is the same as ** in new structure. ** Mar 1, 2006 - change comment style to ansi ** Apr 10, 2007 - add rlm_wfit_anova ** May 19, 2007 - branch out of affyPLM into a new package preprocessCore, then restructure the code. Add doxygen style documentation ** Mar 9. 2008 - Add rlm_fit_anova_given_probeeffects ** Mar 10, 2008 - make rlm_fit_anova_given_probeeffects etc purely single chip ** Mar 12, 2008 - Add rlm_wfit_anova_given_probeeffects ** Nov 1, 2008 - modify rlm_fit_anova_rlm_compute_se_anova() so that se of constrained probe effect (last one) is returned) ** Apr 23, 2009 - Allow scale estimate to be specified or returned in rlm_fit_anova ** Apr 24, 2009 - Allow scale estimate to be specified or returned in rlm_wfit_anova, rlm_fit_anova_given_probe_effects ** Apr 29, 2009 - Ensure that compute scale corresponds to final computed scale estimate ** *********************************************************************/ #include "psi_fns.h" #include "matrix_functions.h" #include "rlm.h" #include "rlm_se.h" #include #include #include #include #include #include #include #include static void XTWY(int y_rows, int y_cols, double *wts,double *y, double *xtwy){ int i,j; /* sweep columns (ie chip effects) */ for (j=0; j < y_cols; j++){ xtwy[j] = 0.0; for (i=0; i < y_rows; i++){ xtwy[j] += wts[j*y_rows + i]* y[j*y_rows + i]; } } /* sweep rows (ie probe effects) */ for (i=0; i < y_rows; i++){ xtwy[i+y_cols] = 0.0; for (j=0; j < y_cols; j++){ xtwy[i+y_cols] += wts[j*y_rows + i]* y[j*y_rows + i]; } } for (i=0; i < y_rows-1; i++){ xtwy[i+y_cols] = xtwy[i+y_cols] - xtwy[y_cols+y_rows-1]; } } /********************************************************************************** ** ** This is for testing the XTWY() function from R using .C() ** *********************************************************************************/ void XTWY_R(int *rows, int *cols, double *out_weights, double *y,double *xtwy){ XTWY(*rows, *cols, out_weights,y,xtwy); } /*************** This is R testing code for my own purposes library(AffyExtensions) data(Dilution) y <- pm(Dilution)[1:16,] .C("XTWY_R",as.integer(16),as.integer(4),as.double(rep(1,64)),as.double(as.vector(log2(y))),double(100)) probes <- rep(1:16,4) samples <- rep(1:4,c(rep(16,4))) X <- model.matrix(~-1 + as.factor(samples) + C(as.factor(probes),"contr.sum")) t(X)%*%as.vector(log2(y)) ****************/ static void XTWX(int y_rows, int y_cols, double *wts, double *xtwx){ int Msize = y_cols +y_rows-1; int i,j,k; /* diagonal elements of first part of matrix ie upper partition */ for (j =0; j < y_cols;j++){ for (i=0; i < y_rows; i++){ xtwx[j*Msize + j]+=wts[j*y_rows + i]; } } /* diagonal portion of lower partition matrix: diagonal elements*/ for (j =0; j < y_cols;j++){ for (i = 0; i < y_rows-1;i++){ xtwx[(y_cols +i)*Msize + (y_cols +i)]+= wts[j*y_rows + i]; } } /* diagonal portion of lower partition matrix: off diagonal elements*/ for (j =0; j < y_cols;j++){ for (i = 0; i < y_rows-1;i++){ for (k=i ; k < y_rows-1;k++){ xtwx[(y_cols +k)*Msize + (y_cols +i)] = xtwx[(y_cols +i)*Msize + (y_cols +k)]+= wts[j*y_rows + (y_rows-1)]; } } } /* the two other portions of the matrix */ for (j =0; j < y_cols;j++){ for (i= 0; i < y_rows-1;i++){ xtwx[j*Msize + (y_cols + i)] = xtwx[(y_cols + i)*Msize + j] = wts[j*y_rows + i] - wts[j*y_rows + (y_rows-1)]; } } } /********************************************************************************** ** ** This is for testing the XTWX from R using .C() ** *********************************************************************************/ void XTWX_R(int *rows, int *cols, double *out_weights, double *xtwx){ XTWX(*rows, *cols, out_weights,xtwx); } /*************** This is R test code .C("XTWX_R",as.integer(16),as.integer(4),rep(1,64)) *************/ static void XTWXinv(int y_rows, int y_cols,double *xtwx){ int i,j,k; int Msize = y_cols +y_rows-1; double *P= Calloc(y_cols,double); double *RP = Calloc(y_cols*(y_rows-1),double); double *RPQ = Calloc((y_rows-1)*(y_rows-1),double); double *S = Calloc((y_rows-1)*(y_rows-1),double); double *work = Calloc((y_rows-1)*(y_rows-1),double); for (j=0;j < y_cols;j++){ for (i=0; i < y_rows -1; i++){ RP[j*(y_rows-1) + i] = xtwx[j*Msize + (y_cols + i)]*(1.0/xtwx[j*Msize+j]); } } for (i=0; i < y_rows -1; i++){ for (j=i;j < y_rows -1; j++){ for (k=0; k < y_cols;k++){ RPQ[j*(y_rows-1) + i] += RP[k*(y_rows-1) + j]*xtwx[k*Msize + (y_cols + i)]; } RPQ[i*(y_rows-1) + j] = RPQ[j*(y_rows-1) + i]; } } for (j=0; j < y_rows-1;j++){ for (i=j; i < y_rows-1;i++){ RPQ[i*(y_rows-1) + j] = RPQ[j*(y_rows-1)+i] = xtwx[(y_cols + j)*Msize + (y_cols + i)] - RPQ[j*(y_rows-1) + i]; } } /*for (i =0; i< y_rows-1; i++){ for (j=0; j < y_cols; j++){ printf("%4.4f ",RP[j*(y_rows-1) + i]); } printf("\n"); } for (j=0;j < y_rows -1; j++){ for (i=0; i < y_rows -1; i++){ printf("%4.4f ",RPQ[j*(y_rows-1) + i]); } printf("\n"); } for (i=0; i < y_rows -1; i++){ for (j=0;j < y_rows -1; j++){ printf("%4.4f ",S[j*(y_rows-1) + i]); } printf("\n"); } */ /* Lets start making the inverse */ Choleski_inverse(RPQ, S, work, y_rows-1, 0); for (j=0; j< y_cols;j++){ for (i=0; i < y_rows -1; i++){ xtwx[j*Msize + (y_cols + i)] = 0.0; for (k=0; k < y_rows -1; k++){ xtwx[j*Msize + (y_cols + i)]+= -1.0*(S[i*(y_rows-1) + k])*RP[j*(y_rows-1) + k]; } xtwx[(y_cols + i)*Msize + j]=xtwx[j*Msize + (y_cols + i)]; } } for (j=0;j < y_cols;j++){ P[j] = 1.0/xtwx[j*Msize+j]; } for (j=0; j < y_cols; j++){ for (i=j; i < y_cols;i++){ xtwx[i*Msize + j]=0.0; for (k=0;k < y_rows-1; k++){ xtwx[i*Msize + j]+= RP[i*(y_rows-1) + k]*xtwx[j*Msize + (y_cols + k)]; } xtwx[i*Msize + j]*=-1.0; xtwx[j*Msize + i] = xtwx[i*Msize + j]; } xtwx[j*Msize + j]+=P[j]; } for (j=0; j < y_rows-1;j++){ for (i=0; i < y_rows-1;i++){ xtwx[(y_cols + j)*Msize + (y_cols + i)] = S[j*(y_rows-1)+i]; } } Free(P); Free(work); Free(RP); Free(RPQ); Free(S); } /********************************************************************************** ** ** This is for testing the XTWXinv from R ** *********************************************************************************/ void XTWX_R_inv(int *rows, int *cols, double *xtwx){ XTWXinv(*rows, *cols, xtwx); } /*************** This is R testing code for my own purposes library(AffyExtensions) probes <- rep(1:16,4) samples <- rep(1:4,c(rep(16,4))) X <- model.matrix(~ -1 + as.factor(samples) + C(as.factor(probes),"contr.sum")) W <- diag(seq(0.05,1,length=64)) solve(t(X)%*%W%*%X) - matrix(.C("XTWX_R_inv",as.integer(16),as.integer(4),as.double(t(X)%*%W%*%X))[[3]],19,19) matrix(.C("XTWX_R_inv",as.integer(16),as.integer(4),as.double(t(X)%*%W%*%X))[[3]],19,19)[1:4,5:19] XTWX <- t(X)%*%W%*%X R <- XTWX[5:19,1:4] P <- XTWX[1:4,1:4] Q <- t(R) S <- XTWX [5:19,5:19] R%*%solve(P) probes <- rep(1:16,100) samples <- rep(1:100,c(rep(16,100))) X <- model.matrix(~ -1 + as.factor(samples) + C(as.factor(probes),"contr.sum")) W <- diag(seq(0.05,1,length=1600)) system.time(matrix(.C("XTWX_R_inv",as.integer(16),as.integer(100),as.double(t(X)%*%W%*%X))[[3]],115,115)) *************/ static void rlm_fit_anova_engine(double *y, int y_rows, int y_cols, double *input_scale, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ int i,j,iter; /* double tol = 1e-7; */ double acc = 1e-4; double scale =0.0; double conv; double endprobe; double *wts = out_weights; double *resids = out_resids; double *old_resids = Calloc(y_rows*y_cols,double); double *rowmeans = Calloc(y_rows,double); double *xtwx = Calloc((y_rows+y_cols-1)*(y_rows+y_cols-1),double); double *xtwy = Calloc((y_rows+y_cols),double); double sumweights, rows; rows = y_rows*y_cols; if (!initialized){ /* intially use equal weights */ for (i=0; i < rows; i++){ wts[i] = 1.0; } } /* starting matrix */ for (i=0; i < y_rows; i++){ for (j=0; j < y_cols; j++){ resids[j*y_rows + i] = y[j*y_rows + i]; } } /* sweep columns (ie chip effects) */ for (j=0; j < y_cols; j++){ out_beta[j] = 0.0; sumweights = 0.0; for (i=0; i < y_rows; i++){ out_beta[j] += wts[j*y_rows + i]* resids[j*y_rows + i]; sumweights += wts[j*y_rows + i]; } out_beta[j]/=sumweights; for (i=0; i < y_rows; i++){ resids[j*y_rows + i] = resids[j*y_rows + i] - out_beta[j]; } } /* sweep rows (ie probe effects) */ for (i=0; i < y_rows; i++){ rowmeans[i] = 0.0; sumweights = 0.0; for (j=0; j < y_cols; j++){ rowmeans[i] += wts[j*y_rows + i]* resids[j*y_rows + i]; sumweights += wts[j*y_rows + i]; } rowmeans[i]/=sumweights; for (j=0; j < y_cols; j++){ resids[j*y_rows + i] = resids[j*y_rows + i] - rowmeans[i]; } } for (i=0; i < y_rows-1; i++){ out_beta[i+y_cols] = rowmeans[i]; } for (iter = 0; iter < max_iter; iter++){ if (*input_scale < 0){ scale = med_abs(resids,rows)/0.6745; } else { scale = *input_scale; } if (fabs(scale) < 1e-10){ /*printf("Scale too small \n"); */ break; } for (i =0; i < rows; i++){ old_resids[i] = resids[i]; } for (i=0; i < rows; i++){ wts[i] = PsiFn(resids[i]/scale,psi_k,0); /* psi_huber(resids[i]/scale,k,0); */ } /* printf("%f\n",scale); */ /* weighted least squares */ memset(xtwx,0,(y_rows+y_cols-1)*(y_rows+y_cols-1)*sizeof(double)); XTWX(y_rows,y_cols,wts,xtwx); XTWXinv(y_rows, y_cols,xtwx); XTWY(y_rows, y_cols, wts,y, xtwy); for (i=0;i < y_rows+y_cols-1; i++){ out_beta[i] = 0.0; for (j=0;j < y_rows+y_cols -1; j++){ out_beta[i] += xtwx[j*(y_rows+y_cols -1)+i]*xtwy[j]; } } /* residuals */ for (i=0; i < y_rows-1; i++){ for (j=0; j < y_cols; j++){ resids[j*y_rows +i] = y[j*y_rows + i]- (out_beta[j] + out_beta[i + y_cols]); } } for (j=0; j < y_cols; j++){ endprobe=0.0; for (i=0; i < y_rows-1; i++){ endprobe+= out_beta[i + y_cols]; } resids[j*y_rows + y_rows-1] = y[j*y_rows + y_rows-1]- (out_beta[j] - endprobe); } /*check convergence based on residuals */ conv = irls_delta(old_resids,resids, rows); if (conv < acc){ /* printf("Converged \n");*/ break; } } if (*input_scale < 0){ scale = med_abs(resids,rows)/0.6745; } else { scale = *input_scale; } Free(xtwx); Free(xtwy); Free(old_resids); Free(rowmeans); input_scale[0] = scale; } void rlm_fit_anova_scale(double *y, int y_rows, int y_cols,double *scale, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ rlm_fit_anova_engine(y, y_rows, y_cols, scale, out_beta, out_resids, out_weights,PsiFn, psi_k, max_iter, initialized); } /********************************************************************************** ** ** void rlm_fit_anova(double *y, int rows, int cols,double *out_beta, ** double *out_resids, double *out_weights, ** double (* PsiFn)(double, double, int), double psi_k,int max_iter, ** int initialized)) ** ** double *y - matrix of response variables (stored by column, with rows probes, columns chips ** int rows - dimensions of y ** int cols - dimensions of y ** ** specializes procedure so decomposes matrix more efficiently ** note that routine is not as numerically stable as above. ** ** fits a row + columns model ** **********************************************************************************/ void rlm_fit_anova(double *y, int y_rows, int y_cols,double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ double scale = -1.0; rlm_fit_anova_engine(y, y_rows, y_cols, &scale, out_beta, out_resids, out_weights,PsiFn, psi_k, max_iter, initialized); } /********************************************************************************** ** ** This is for testing the rlm_fit_anova function from R ** *********************************************************************************/ void rlm_fit_anova_R(double *y, int *rows, int *cols, double *out_beta, double *out_resids, double *out_weights, int *its){ rlm_fit_anova(y, *rows, *cols, out_beta, out_resids,out_weights,psi_huber,1.345, *its,0); } /* This is testing code for my own use. library(AffyExtensions) data(Dilution) y <- pm(Dilution)[1:16,] .C("rlm_fit_anova_R",as.double(log2(y)),as.integer(16),as.integer(4),double(20),double(64),double(64),as.integer(1)) probes <- rep(1:16,4) samples <- rep(1:4,c(rep(16,4))) library(MASS) rlm(as.vector(log2(y)) ~ -1 + as.factor(samples) + C(as.factor(probes),"contr.sum"),maxit=20)$weights # apply(matrix(rlm(as.vector(y) ~ -1 + as.factor(samples) + C(as.factor(probes),"contr.sum"),maxit=1)$w,ncol=4)*y,2,sum)/apply(matrix(rlm(as.vector(y) ~ -1 + as.factor(samples) + C(as.factor(probes),"contr.sum"),maxit=1)$w,ncol=4),2,sum) abs(resid(rlm(as.vector(log2(y)) ~ -1 + as.factor(samples) + C(as.factor(probes),"contr.sum"),maxit=1))- .C("rlm_fit_anova_R",as.double(log2(y)),as.integer(16),as.integer(4),double(20),double(64),double(64),as.integer(1))[[5]]) > 10^-6 */ void rlm_wfit_anova_engine(double *y, int y_rows, int y_cols, double *input_scale, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ int i,j,iter; /* double tol = 1e-7; */ double acc = 1e-4; double scale =0.0; double conv; double endprobe; double *wts = out_weights; double *resids = out_resids; double *old_resids = Calloc(y_rows*y_cols,double); double *rowmeans = Calloc(y_rows,double); double *xtwx = Calloc((y_rows+y_cols-1)*(y_rows+y_cols-1),double); double *xtwy = Calloc((y_rows+y_cols),double); double sumweights, rows; rows = y_rows*y_cols; if (!initialized){ /* intially use equal weights */ for (i=0; i < rows; i++){ wts[i] = w[i]*1.0; } } /* starting matrix */ for (i=0; i < y_rows; i++){ for (j=0; j < y_cols; j++){ resids[j*y_rows + i] = y[j*y_rows + i]; } } /* sweep columns (ie chip effects) */ for (j=0; j < y_cols; j++){ out_beta[j] = 0.0; sumweights = 0.0; for (i=0; i < y_rows; i++){ out_beta[j] += wts[j*y_rows + i]* resids[j*y_rows + i]; sumweights += wts[j*y_rows + i]; } out_beta[j]/=sumweights; for (i=0; i < y_rows; i++){ resids[j*y_rows + i] = resids[j*y_rows + i] - out_beta[j]; } } /* sweep rows (ie probe effects) */ for (i=0; i < y_rows; i++){ rowmeans[i] = 0.0; sumweights = 0.0; for (j=0; j < y_cols; j++){ rowmeans[i] += wts[j*y_rows + i]* resids[j*y_rows + i]; sumweights += wts[j*y_rows + i]; } rowmeans[i]/=sumweights; for (j=0; j < y_cols; j++){ resids[j*y_rows + i] = resids[j*y_rows + i] - rowmeans[i]; } } for (i=0; i < y_rows-1; i++){ out_beta[i+y_cols] = rowmeans[i]; } for (iter = 0; iter < max_iter; iter++){ if (*input_scale < 0){ scale = med_abs(resids,rows)/0.6745; } else { scale = *input_scale; } if (fabs(scale) < 1e-10){ /*printf("Scale too small \n"); */ break; } for (i =0; i < rows; i++){ old_resids[i] = resids[i]; } for (i=0; i < rows; i++){ wts[i] = w[i]*PsiFn(resids[i]/scale,psi_k,0); /* psi_huber(resids[i]/scale,k,0); */ } /* printf("%f\n",scale); */ /* weighted least squares */ memset(xtwx,0,(y_rows+y_cols-1)*(y_rows+y_cols-1)*sizeof(double)); XTWX(y_rows,y_cols,wts,xtwx); XTWXinv(y_rows, y_cols,xtwx); XTWY(y_rows, y_cols, wts,y, xtwy); for (i=0;i < y_rows+y_cols-1; i++){ out_beta[i] = 0.0; for (j=0;j < y_rows+y_cols -1; j++){ out_beta[i] += xtwx[j*(y_rows+y_cols -1)+i]*xtwy[j]; } } /* residuals */ for (i=0; i < y_rows-1; i++){ for (j=0; j < y_cols; j++){ resids[j*y_rows +i] = y[j*y_rows + i]- (out_beta[j] + out_beta[i + y_cols]); } } for (j=0; j < y_cols; j++){ endprobe=0.0; for (i=0; i < y_rows-1; i++){ endprobe+= out_beta[i + y_cols]; } resids[j*y_rows + y_rows-1] = y[j*y_rows + y_rows-1]- (out_beta[j] - endprobe); } /*check convergence based on residuals */ conv = irls_delta(old_resids,resids, rows); if (conv < acc){ /* printf("Converged \n");*/ break; } } if (*input_scale < 0){ scale = med_abs(resids,rows)/0.6745; } else { scale = *input_scale; } Free(xtwx); Free(xtwy); Free(old_resids); Free(rowmeans); input_scale[0] = scale; } void rlm_wfit_anova(double *y, int y_rows, int y_cols, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ double scale = -1.0; rlm_wfit_anova_engine(y, y_rows, y_cols, &scale, w, out_beta, out_resids, out_weights, PsiFn , psi_k, max_iter, initialized); } void rlm_wfit_anova_scale(double *y, int y_rows, int y_cols,double *scale, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ rlm_wfit_anova_engine(y, y_rows, y_cols, scale, w, out_beta, out_resids, out_weights,PsiFn, psi_k, max_iter, initialized); } /************************************************************************* ** ** void RLM_SE_Method_1_anova(double residvar, double *XTX, int p, double *se_estimates) ** ** double residvar - residual variance estimate ** double *XTX - t(Design matrix)%*% Design Matrix ** double p - number of parameters ** double *se_estimates - on output contains standard error estimates for each of ** the parametes ** ** this function computes the parameter standard errors using the first ** method described in Huber (1981) ** ** ie k^2 (sum psi^2/(n-p))/(sum psi'/n)^2 *(XtX)^(-1) ** ** ************************************************************************/ static void RLM_SE_Method_1_anova(double residvar, double *XTX, int y_rows,int y_cols, double *se_estimates,double *varcov){ int i,j; int p = y_rows + y_cols -1; XTWXinv(y_rows, y_cols,XTX); for (i =0; i < p; i++){ se_estimates[i] = sqrt(residvar*XTX[i*p + i]); } /*** for (i =0; i < y_rows-1; i++){ se_estimates[i] = sqrt(residvar*XTX[(i+y_cols)*p + (i+y_cols)]); } for (i =0; i < y_cols; i++){ se_estimates[i+(y_rows -1)] = sqrt(residvar*XTX[i*p + i]); } ***/ if (varcov != NULL) for (i =0; i < p; i++){ for (j = i; j < p; j++){ varcov[j*p +i]= residvar*XTX[j*p +i]; } } /*** if (varcov != NULL){ // copy across varcov matrix in right order for (i = 0; i < y_rows-1; i++) for (j = i; j < y_rows-1; j++) varcov[j*p + i] = residvar*XTX[(j+y_cols)*p + (i+y_cols)]; for (i = 0; i < y_cols; i++) for (j = i; j < y_cols; j++) varcov[(j+(y_rows-1))*p + (i+(y_rows -1))] = residvar*XTX[j*p + i]; for (i = 0; i < y_cols; i++) for (j = y_cols; j < p; j++) varcov[(i+ y_rows -1)*p + (j - y_cols)] = residvar*XTX[j*p + i]; } **/ } /************************************************************************* ** ** void RLM_SE_Method_2(double residvar, double *W, int p, double *se_estimates) ** ** double residvar - residual variance estimate ** double *XTX - t(Design matrix)%*% Design Matrix ** double p - number of parameters ** double *se_estimates - on output contains standard error estimates for each of ** the parametes ** ** this function computes the parameter standard errors using the second ** method described in Huber (1981) ** ** ie K*(sum psi^2/(n-p))/(sum psi'/n) *(W)^(-1) ** ** ************************************************************************/ static void RLM_SE_Method_2_anova(double residvar, double *W, int y_rows,int y_cols, double *se_estimates,double *varcov){ int i,j; /* l,k; */ int p = y_rows + y_cols -1; double *Winv = Calloc(p*p,double); double *work = Calloc(p*p,double); if (!Choleski_inverse(W,Winv,work,p,1)){ for (i =0; i < p; i++){ se_estimates[i] = sqrt(residvar*Winv[i*p + i]); /* printf("%f ", se_estimates[i]); */ } /*for (i =0; i < y_rows-1; i++){ se_estimates[i] = sqrt(residvar*Winv[(i+y_cols)*p + (i+y_cols)]); } for (i =0; i < y_cols; i++){ se_estimates[i+(y_rows -1)] = sqrt(residvar*Winv[i*p + i]); } */ } else { /* printf("Using a G-inverse\n"); */ SVD_inverse(W, Winv,p); for (i =0; i < p; i++){ se_estimates[i] = sqrt(residvar*Winv[i*p + i]); /* printf("%f ", se_estimates[i]); */ } /* for (i =0; i < y_rows-1; i++){ se_estimates[i] = sqrt(residvar*Winv[(i+y_cols)*p + (i+y_cols)]); } for (i =0; i < y_cols; i++){ se_estimates[i+(y_rows -1)] = sqrt(residvar*Winv[i*p + i]); } */ } if (varcov != NULL) for (i =0; i < p; i++){ for (j = i; j < p; j++){ varcov[j*p +i]= residvar*Winv[j*p +i]; } } /** if (varcov != NULL){ copy across varcov matrix in right order for (i = 0; i < y_rows-1; i++) for (j = i; j < y_rows-1; j++) varcov[j*p + i] = residvar*Winv[(j+y_cols)*p + (i+y_cols)]; for (i = 0; i < y_cols; i++) for (j = i; j < y_cols; j++) varcov[(j+(y_rows-1))*p + (i+(y_rows -1))] = residvar*Winv[j*p + i]; for (i = 0; i < y_cols; i++) for (j = y_cols; j < p; j++) varcov[(i+ y_rows -1)*p + (j - y_cols)] = residvar*Winv[j*p + i]; } **/ Free(work); Free(Winv); } /************************************************************************* ** ** void RLM_SE_Method_3(double residvar, double *XTX, double *W, int p, double *se_estimates) ** ** double residvar - residual variance estimate ** double *XTX - t(Design matrix)%*% Design Matrix ** double p - number of parameters ** double *se_estimates - on output contains standard error estimates for each of ** the parametes ** ** this function computes the parameter standard errors using the third ** method described in Huber (1981) ** ** ie 1/(K)*(sum psi^2/(n-p))*(W)^(-1)(XtX)W^(-1) ** ** ************************************************************************/ static int RLM_SE_Method_3_anova(double residvar, double *XTX, double *W, int y_rows,int y_cols, double *se_estimates,double *varcov){ int i,j,k; /* l; */ int rv; int p = y_rows + y_cols -1; double *Winv = Calloc(p*p,double); double *work = Calloc(p*p,double); /***************** double *Wcopy = Calloc(p*p,double); for (i=0; i 1){ XTWXinv(y_rows, y_cols,XTX); } else { for (i=0; i < p; i++){ XTX[i*p + i] = 1.0/XTX[i*p + i]; } } /* make sure in right order for (i =0; i < y_rows-1; i++){ se_estimates[i] = RMSEw*sqrt(XTX[(i+y_cols)*p + (i+y_cols)]); } for (i =0; i < y_cols; i++){ se_estimates[i+(y_rows -1)] = RMSEw*sqrt(XTX[i*p + i]); } */ for (i =0; i < p; i++){ se_estimates[i] = RMSEw*sqrt(XTX[i*p + i]); } if (varcov != NULL) for (i = 0; i < p; i++) for (j = i; j < p; j++) varcov[j*p + i] = RMSEw*RMSEw*XTX[j*p + i]; se_estimates[p] = 0.0; for (i=y_cols; i < p; i++) for (j = y_cols; j < p; j++) se_estimates[p]+= -1*RMSEw*RMSEw*XTX[j*p + i]; se_estimates[p] = sqrt(-1*se_estimates[p]); /* if (varcov != NULL){ copy across varcov matrix in right order for (i = 0; i < y_rows-1; i++) for (j = i; j < y_rows-1; j++) varcov[j*p + i] = RMSEw*RMSEw*XTX[(j+y_cols)*p + (i+y_cols)]; for (i = 0; i < y_cols; i++) for (j = i; j < y_cols; j++) varcov[(j+(y_rows-1))*p + (i+(y_rows -1))] = RMSEw*RMSEw*XTX[j*p + i]; for (i = 0; i < y_cols; i++) for (j = y_cols; j < p; j++) varcov[(i+ y_rows -1)*p + (j - y_cols)] = RMSEw*RMSEw*XTX[j*p + i]; } */ } else { scale = med_abs(resids,n)/0.6745; residSE[0] = scale; /* compute most of what we will need to do each of the different standard error methods */ for (i =0; i < n; i++){ sumpsi2+= PsiFn(resids[i]/scale,k1,2)*PsiFn(resids[i]/scale,k1,2); /* sumpsi += psi_huber(resids[i]/scale,k1,2); */ sumderivpsi+= PsiFn(resids[i]/scale,k1,1); } m = (sumderivpsi/(double) n); for (i = 0; i < n; i++){ varderivpsi+=(PsiFn(resids[i]/scale,k1,1) - m)*(PsiFn(resids[i]/scale,k1,1) - m); } varderivpsi/=(double)(n); /* Kappa = 1.0 + (double)p/(double)n * (1.0-m)/(m); */ Kappa = 1.0 + ((double)p/(double)n) *varderivpsi/(m*m); /* prepare XtX and W matrices */ for (i=0; i < n; i++){ W_tmp[i] = 1.0; } XTWX(y_rows,y_cols,W_tmp,XTX); for (i=0; i < n; i++){ W_tmp[i] = PsiFn(resids[i]/scale,k1,1); } XTWX(y_rows,y_cols,W_tmp,W); if (method==1) { Kappa = Kappa*Kappa; vs = scale*scale*sumpsi2/(double)(n-p); Kappa = Kappa*vs/(m*m); RLM_SE_Method_1_anova(Kappa, XTX, y_rows,y_cols, se_estimates,varcov); } else if (method==2){ vs = scale*scale*sumpsi2/(double)(n-p); Kappa = Kappa*vs/m; RLM_SE_Method_2_anova(Kappa, W, y_rows,y_cols, se_estimates,varcov); } else if (method==3){ vs = scale*scale*sumpsi2/(double)(n-p); Kappa = 1.0/Kappa*vs; i = RLM_SE_Method_3_anova(Kappa, XTX, W, y_rows,y_cols, se_estimates,varcov); if (i){ for (i=0; i 1){ colonly_XTWXinv(y_rows, y_cols,XTX); } else { for (i=0; i < p; i++){ XTX[i*p + i] = 1.0/XTX[i*p + i]; } } for (i =0; i < p; i++){ se_estimates[i] = RMSEw*sqrt(XTX[i*p + i]); } if (varcov != NULL) for (i = 0; i < p; i++) for (j = i; j < p; j++) varcov[j*p + i] = RMSEw*RMSEw*XTX[j*p + i]; } */ /* the new single chip code */ colonly_XTWX(y_rows,y_cols,weights,XTX); if (y_rows > 1){ colonly_XTWXinv(y_rows, y_cols,XTX); } else { for (i=0; i < p; i++){ XTX[i*p + i] = 1.0/XTX[i*p + i]; } } for (j=0; j < y_cols; j++){ RMSEw = 0.0; for (i=0; i < y_rows; i++){ RMSEw+= weights[j*y_rows + i]*resids[j*y_rows + i]*resids[j*y_rows + i]; } RMSEw = sqrt(RMSEw/(double)(y_rows-1)); se_estimates[j] = RMSEw*sqrt(XTX[j*p + j]); } Free(W_tmp); Free(work); Free(XTX); Free(W); } void rlm_wfit_anova_given_probe_effects_engine(double *y, int y_rows, int y_cols, double *input_scale, double *probe_effects, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ int i,j,iter; /* double tol = 1e-7; */ double acc = 1e-4; double *scale =Calloc((y_cols),double); double conv; double endprobe; double *wts = out_weights; double *resids = out_resids; double *old_resids = Calloc(y_rows*y_cols,double); double *rowmeans = Calloc(y_rows,double); double *xtwx = Calloc((y_cols)*(y_cols),double); double *xtwy = Calloc((y_cols),double); double sumweights, rows; rows = y_rows*y_cols; if (!initialized){ /* intially use equal weights */ for (i=0; i < rows; i++){ wts[i] = w[i]*1.0; } } /* starting matrix */ for (i=0; i < y_rows; i++){ for (j=0; j < y_cols; j++){ resids[j*y_rows + i] = y[j*y_rows + i] - probe_effects[i]; } } /* sweep columns (ie chip effects) */ for (j=0; j < y_cols; j++){ out_beta[j] = 0.0; sumweights = 0.0; for (i=0; i < y_rows; i++){ out_beta[j] += wts[j*y_rows + i]* resids[j*y_rows + i]; sumweights += wts[j*y_rows + i]; } out_beta[j]/=sumweights; for (i=0; i < y_rows; i++){ resids[j*y_rows + i] = resids[j*y_rows + i] - out_beta[j]; } } for (iter = 0; iter < max_iter; iter++){ /* The new single-chip code */ for (i =0; i < rows; i++){ old_resids[i] = resids[i]; } for (j = 0; j < y_cols; j++){ if (input_scale[j] < 0.0){ scale[j] = med_abs(&resids[j*y_rows],y_rows)/0.6745; } else { scale[j] = input_scale[j]; } for (i=0; i < y_rows; i++){ if (fabs(scale[j]) < 1e-10){ break; } wts[j*y_rows + i] = w[j*y_rows + i]*PsiFn(resids[j*y_rows + i]/scale[j],psi_k,0); } } /* printf("%f\n",scale); */ /* weighted least squares */ memset(xtwx,0,(y_cols)*(y_cols)*sizeof(double)); colonly_XTWX(y_rows,y_cols,wts,xtwx); colonly_XTWXinv(y_rows, y_cols,xtwx); colonly_XTWY(y_rows, y_cols, wts,y, xtwy); for (i=0;i < y_cols; i++){ out_beta[i] = 0.0; for (j=0;j < y_cols; j++){ out_beta[i] += xtwx[j*y_cols + i]*xtwy[j]; } } /* residuals */ for (i=0; i < y_rows; i++){ for (j=0; j < y_cols; j++){ resids[j*y_rows +i] = y[j*y_rows + i] - probe_effects[i] - (out_beta[j]); } } /*check convergence based on residuals */ conv = irls_delta(old_resids,resids, rows); if (conv < acc){ /* printf("Converged \n");*/ break; } } for (j = 0; j < y_cols; j++){ if (input_scale[j] < 0.0){ scale[j] = med_abs(&resids[j*y_rows],y_rows)/0.6745; } else { scale[j] = input_scale[j]; } } Free(xtwx); Free(xtwy); Free(old_resids); Free(rowmeans); for (j = 0; j < y_cols; j++){ input_scale[j] = scale[j]; } Free(scale); } void rlm_wfit_anova_given_probe_effects(double *y, int y_rows, int y_cols, double *probe_effects, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ double *scale=Calloc((y_cols),double); int j; for (j=0; j < y_cols; j++){ scale[j] = -1.0; } rlm_wfit_anova_given_probe_effects_engine(y, y_rows, y_cols, scale, probe_effects, w, out_beta, out_resids, out_weights, PsiFn, psi_k, max_iter, initialized); Free(scale); } void rlm_wfit_anova_given_probe_effects_scale(double *y, int y_rows, int y_cols, double *scale, double *probe_effects, double *w, double *out_beta, double *out_resids, double *out_weights,double (* PsiFn)(double, double, int), double psi_k,int max_iter, int initialized){ rlm_wfit_anova_given_probe_effects_engine(y, y_rows, y_cols, scale, probe_effects, w, out_beta, out_resids, out_weights, PsiFn, psi_k, max_iter, initialized); } preprocessCore/src/rlm_se.c0000644000126300012640000003173212127220006017345 0ustar00biocbuildphs_compbio/********************************************************************* ** ** file: rlm_se.c ** ** Aim: implement computation of standard errors for robust linear models. ** ** Copyright (C) 2003-2004 Ben Bolstad ** ** created by: B. M. Bolstad ** ** created on: Jan 27, 2003 ** ** Last modified: Feb 11, 2003 ** ** We provide implemementations of all three methods of computing standard errors ** given in Huber (1981) Robust Statistic. Wiley. In particular equations ** (6.5), (6.6) and (6.7). Finally we will implement the standard errors in terms ** of the standard errors from a weighted linear regression. Note that Huber strongly advises ** against the use of this last method as it is "not robust in general". We provide it to agree ** with previous implementations in R code. ** ** In particular we implement functions ** RLM_SE_Method_1 (6.5) ** RLM_SE_Method_2 (6.6) ** RLM_SE_Method_3 (6.7) ** RLM_SE_Method_4 (weighted regression method) ** ** ** History ** ** Jan 27, 2003 - Initial version using weighted least squares type se ** Jan 28, 2003 - More work, try to get huber recommendations working. ** Realize that the W matrix is sometimes not of full ** rank. to get around this problem recommend using ** generalized inverse (compute using svd). <-- TO DO ** Add in better checking to the inversion routines (Choleski). ** Jan 31, 2003 - make Choleski inverse take parameter to return only ** upper triangle of inverse matrix ** Actually make se routines check that nothing bad happens ** when taking the inverse. ** Feb 02, 2003 - Test if choleski failed, if did use an svd to compute ** a generalize invese and use this instead. ** code for testing failure to se.type 2,3 added ** SVD approach to inverses added and tested. ** Feb 08, 2003 - Move a code block which will improve efficiency for ** se.type = 4 ** TO BE DONE: replace linpack routines with Lapack routines to further improve speed ** Feb 10, 2003 - Change sumpsi, sumpsi2 to user option 2 (the actual psi function) in psi_huber. ** this will fix a bug in se.type=1,2,3. we would agree completely ** with summary.rlm in R except that we estimate the scale parameter using ** residuals from the final fit and R uses a scale from one step back. ** A mechanism has been added to switch between LAPACK and LINPACK with the default ** being LAPACK. ** Feb 11, 2003 - Make LINPACK DEFAULT, comment out LAPACK in chol, svd routines, solves linking ** problems on windows, will get fixed later ** Feb 24, 2003 - comment out a few declared but unused variables, some of these might be used later ** Mar 28, 2003 - uncomment LAPACK code. LAPACK will be default using R 1.7.0 and later, which ** will be a requirement for AffyExtensions 0.5-1 and later ** Jun 11, 2003 - Modify Standard error routines to handle different psi_fns. ** Jul 23, 2003 - remove the warning about moduleCdynload by including appropriate header file ** Sep 06, 2003 - now we return the whole variance covariance matrix and residual SE with appropriate ** DF ** Sep 07, 2003 - variance matrix from method == 4 now returned ** Sep 08, 2003 - variance matrix from method == 1, 2, 3 returned ** Sep 13, 2003 - copy only upper triangle of variance matrix into output. ** Also if the variance matrix is the NULL pointer don't store anything ** at all. ** Jan 17, 2004 - tweak how kappa is estimated so it works better ** in with non - huber psis ** June 22, 2004 - moved some functions to matrix_functions.c ** March 1, 2006 - change all comments to ansi style ** ********************************************************************/ #include "rlm.h" #include "rlm_se.h" #include "psi_fns.h" #include "matrix_functions.h" #include #include #include #include #include #include #include #include /************************************************************************* ** ** void RLM_SE_Method_1(double residvar, double *XTX, int p, double *se_estimates) ** ** double residvar - residual variance estimate ** double *XTX - t(Design matrix)%*% Design Matrix ** double p - number of parameters ** double *se_estimates - on output contains standard error estimates for each of ** the parametes ** ** this function computes the parameter standard errors using the first ** method described in Huber (1981) ** ** ie k^2 (sum psi^2/(n-p))/(sum psi'/n)^2 *(XtX)^(-1) ** ** ************************************************************************/ static void RLM_SE_Method_1(double residvar, double *XTX, int p, double *se_estimates,double *varcov){ int i,j; double *XTXinv = Calloc(p*p,double); double *work = Calloc(p*p,double); if (!Choleski_inverse(XTX,XTXinv,work,p,1)){ for (i =0; i < p; i++){ se_estimates[i] = sqrt(residvar*XTXinv[i*p + i]); } } else { printf("Singular matrix in SE inverse calculation"); } if (varcov != NULL) for (i =0; i < p; i++){ for (j = i; j < p; j++){ varcov[j*p +i]= residvar*XTXinv[j*p +i]; } } Free(work); Free(XTXinv); } /************************************************************************* ** ** void RLM_SE_Method_2(double residvar, double *W, int p, double *se_estimates) ** ** double residvar - residual variance estimate ** double *XTX - t(Design matrix)%*% Design Matrix ** double p - number of parameters ** double *se_estimates - on output contains standard error estimates for each of ** the parametes ** ** this function computes the parameter standard errors using the second ** method described in Huber (1981) ** ** ie K*(sum psi^2/(n-p))/(sum psi'/n) *(W)^(-1) ** ** ************************************************************************/ static void RLM_SE_Method_2(double residvar, double *W, int p, double *se_estimates,double *varcov){ int i,j; /* l,k; */ double *Winv = Calloc(p*p,double); double *work = Calloc(p*p,double); if (!Choleski_inverse(W,Winv,work,p,1)){ for (i =0; i < p; i++){ se_estimates[i] = sqrt(residvar*Winv[i*p + i]); /* printf("%f ", se_estimates[i]); */ } } else { /* printf("Using a G-inverse\n"); */ SVD_inverse(W, Winv,p); for (i =0; i < p; i++){ se_estimates[i] = sqrt(residvar*Winv[i*p + i]); /* printf("%f ", se_estimates[i]); */ } } if (varcov != NULL) for (i =0; i < p; i++){ for (j = i; j < p; j++){ varcov[j*p +i]= residvar*Winv[j*p +i]; } } Free(work); Free(Winv); } /************************************************************************* ** ** void RLM_SE_Method_3(double residvar, double *XTX, double *W, int p, double *se_estimates) ** ** double residvar - residual variance estimate ** double *XTX - t(Design matrix)%*% Design Matrix ** double p - number of parameters ** double *se_estimates - on output contains standard error estimates for each of ** the parametes ** ** this function computes the parameter standard errors using the third ** method described in Huber (1981) ** ** ie 1/(K)*(sum psi^2/(n-p))*(W)^(-1)(XtX)W^(-1) ** ** ************************************************************************/ static int RLM_SE_Method_3(double residvar, double *XTX, double *W, int p, double *se_estimates,double *varcov){ int i,j,k; /* l; */ int rv; double *Winv = Calloc(p*p,double); double *work = Calloc(p*p,double); /***************** double *Wcopy = Calloc(p*p,double); for (i=0; i ** Implementation dates: 2002-2008 ** ** Background: this file is named rma_background4.c reflecting its history ** but named distinctly, since it's history is somewhat muddled, and it no longer ** provides the full functionality of earlier versions. Instead it stabilizes ** on the most recent implementations. ** ** A brief history: ** rma_background.c was originally in AffyExtensions and had a method that used both PM and MM ** probes. It used the R function density() to compute the density estimator ** Eventually this morphed into rma_background2.c which was in the affy package. It added an additonal ** background implementation which used PM probes only. This became the default implementation and it used ** the R density() function. ** Later rma_background3.c was created for RMAExpress. It did not use the R density() function. Instead, it ** used code from weightedkerneldensity.c ** This file (rma_background4.c) aims to implement the standard RMA background correction method ** using code from weightedkerneldensity.c (ie removing the dependence on R density() function) ** ** History ** Mar 15, 2008 - Initial version of rma_background4.c ** Mar 16, 2008 - ** Jun 4, 2008 - fix bug with R interface, was not correctly returning value when copy ==TRUE ** Dec 1, 2010 - change how PTHREAD_STACK_MIN is used ** ** *****************************************************************************/ #include #include #include #include #include #include #include #include "weightedkerneldensity.h" #include "rma_background4.h" #ifdef USE_PTHREADS #include #include #include #define THREADS_ENV_VAR "R_THREADS" pthread_mutex_t mutex_R; struct loop_data{ double *data; int rows; int cols; int start_col; int end_col; }; #endif /*********************************************************** ** ** double find_max(double *x, int length) ** ** this function returns the max of x ** ************************************************************/ static double find_max(double *x,int length){ int i; double max; max = x[0]; for (i=1; i < length; i++){ if (x[i] > max){ max = x[i]; } } return max; } /************************************************************************************** ** ** double max_density(double *z,int rows,int cols,int column) ** ** double *z - matrix of dimension rows*cols ** int cols - matrix dimension ** int rows - matrix dimension ** int column - column of interest ** *************************************************************************************/ static double max_density(double *z,int rows,int cols,int column){ int i; double *x; double *dens_x; double *dens_y; double max_y,max_x; int npts = 16384; dens_x = Calloc(npts,double); dens_y = Calloc(npts,double); // KernelDensity(double *x, int *nxxx, double *weights, double *output, double *xords, int *nout) x = Calloc(rows,double); for (i=0; i< rows; i++){ x[i] = z[column*rows +i]; } KernelDensity_lowmem(x,&rows,dens_y,dens_x,&npts); max_y = find_max(dens_y,16384); i = 0; do { if (dens_y[i] == max_y) break; i++; } while(1); max_x = dens_x[i]; Free(dens_x); Free(dens_y); Free(x); return max_x; } /*************************************************************** ** ** double get_sd(double *MM, double MMmax, int rows, int cols, int column) ** ** double *PM - pm matrix ** double PMmax - value of mode of PMs for column ** int rows,cols - dimensions of matrix ** int column - column (chip) of interest ** ** estimate the sigma parameter given vector MM value of maximum of density ** of MM, dimensions of MM matrix and column of interest ** ** ***************************************************************/ static double get_sd(double *PM, double PMmax, int rows, int cols, int column){ double sigma; double tmpsum = 0.0; int numtop=0; int i; for (i=0; i < rows; i++){ if (PM[column*rows + i] < PMmax){ tmpsum = tmpsum + (PM[column*rows + i] - PMmax)*(PM[column*rows + i] - PMmax); numtop++; } } sigma = sqrt(tmpsum/(numtop -1))*sqrt(2.0)/0.85; return sigma; } /*************************************************************** ** ** double get_alpha(double *PM,double PMmax, int rows,int cols,int column) ** ** estimate the alpha parameter given vector PM value of maximum of density ** of PM, dimensions of MM matrix and column of interest using method proposed ** in affy2 ** ** ***************************************************************/ static double get_alpha(double *PM, double PMmax, int length){ double alpha; int i; for (i=0; i < length; i++){ PM[i] = PM[i] - PMmax; } alpha = max_density(PM,length, 1,0); alpha = 1.0/alpha; return alpha ; } /******************************************************************************** ** ** void rma_bg_parameters(double *PM,double *MM, double *param, int rows, int cols, int column,SEXP fn,SEXP rho) ** ** estimate the parameters for the background, they will be returned in *param ** param[0] is alpha, param[1] is mu, param[2] is sigma. ** ** parameter estimates are same as those given by affy in bg.correct.rma (Version 1.1 release of affy) ** *******************************************************************************/ void rma_bg_parameters(double *PM,double *param, int rows, int cols, int column){ int i = 0; double PMmax; double sd,alpha; int n_less=0,n_more=0; double *tmp_less = (double *)Calloc(rows,double); double *tmp_more = (double *)Calloc(rows,double); PMmax = max_density(PM,rows, cols, column); for (i=0; i < rows; i++){ if (PM[column*rows +i] < PMmax){ tmp_less[n_less] = PM[column*rows +i]; n_less++; } } PMmax = max_density(tmp_less,n_less,1,0); sd = get_sd(PM,PMmax,rows,cols,column)*0.85; for (i=0; i < rows; i++){ if (PM[column*rows +i] > PMmax) { tmp_more[n_more] = PM[column*rows +i]; n_more++; } } /* the 0.85 is to fix up constant in above */ alpha = get_alpha(tmp_more,PMmax,n_more); param[0] = alpha; param[1] = PMmax; param[2] = sd; Free(tmp_less); Free(tmp_more); } /********************************************************************************** ** ** double Phi(double x) ** ** Compute the standard normal distribution function ** *********************************************************************************/ static double Phi(double x){ return pnorm5(x,0.0,1.0,1,0); } /*********************************************************************************** ** ** double phi(double x) ** ** compute the standard normal density. ** ** **********************************************************************************/ static double phi(double x){ return dnorm4(x,0.0,1.0,0); } /************************************************************************************ ** ** void bg_adjust(double *PM,double *MM, double *param, int rows, int cols, int column) ** ** double *PM - PM matrix of dimension rows by cols ** double *param - background model parameters ** int rows, cols - dimension of matrix ** int column - which column to adjust ** ** note we will assume that param[0] is alpha, param[1] is mu, param[2] is sigma ** ***********************************************************************************/ void rma_bg_adjust(double *PM, double *param, int rows, int cols, int column){ int i; double a; for (i=0; i < rows; i++){ a = PM[column*rows + i] - param[1] - param[0]*param[2]*param[2]; PM[column*rows + i] = a + param[2] * phi(a/param[2])/Phi(a/param[2]); } } #ifdef USE_PTHREADS void *rma_bg_correct_group(void *data){ int j; double param[3]; struct loop_data *args = (struct loop_data *) data; for (j=args->start_col; j <= args->end_col; j++){ rma_bg_parameters(args->data, param, args->rows, args->cols, j); rma_bg_adjust(args->data, param, args->rows, args->cols, j); } } #endif /************************************************************************************ ** ** void rma_bg_correct(double *PM,double *MM, double *param, int rows, int cols, int column) ** ** double *PM - PM matrix of dimension rows by cols ** int rows - dimensions of the matrix ** int cols - dimensions of the matrix ** ** rma background correct the columns of a supplied matrix ** ** ************************************************************************************/ void rma_bg_correct(double *PM, int rows, int cols){ int j; double param[3]; #ifdef USE_PTHREADS int i; int t, returnCode, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; char *nthreads; pthread_attr_t attr; pthread_t *threads; struct loop_data *args; void *status; #ifdef PTHREAD_STACK_MIN size_t stacksize = PTHREAD_STACK_MIN + 0x4000; #else size_t stacksize = 0x8000; #endif #endif #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of columns to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of columns is less than the number of threads */ if (num_threads < cols){ chunk_size = cols/num_threads; chunk_size_d = ((double) cols)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } args = (struct loop_data *) Calloc((cols < num_threads ? cols : num_threads), struct loop_data); args[0].data = PM; args[0].rows = rows; args[0].cols = cols; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < cols; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data)); } args[t].start_col = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].end_col = i+chunk_size; i++; } else{ args[t].end_col = i+chunk_size-1; } t++; } /* Determining the quantile normalization target distribution */ for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, rma_bg_correct_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); Free(threads); Free(args); #else for (j=0; j < cols; j++){ rma_bg_parameters(PM, param,rows,cols,j); rma_bg_adjust(PM,param,rows,cols,j); } #endif } /************************************************************************************ ** ** SEXP R_rma_bg_correct(SEXP PMmat, SEXP MMmat, SEXP densfunc, SEXP rho) ** ** given R matricies PMmat and MMmat background correct the columns of PMmat ** ** SEXP PMmat - matrix of PM's ** ** this function can be dangerous since it changes PM values on the input matrix (ie it makes no copies) ** ***********************************************************************************/ SEXP R_rma_bg_correct(SEXP PMmat,SEXP copy){ SEXP dim1,PMcopy; int j; int rows; int cols; double *PM; PROTECT(dim1 = getAttrib(PMmat,R_DimSymbol)); rows = INTEGER(dim1)[0]; cols = INTEGER(dim1)[1]; if (asInteger(copy)){ PROTECT(PMcopy = allocMatrix(REALSXP,rows,cols)); copyMatrix(PMcopy,PMmat,0); PM = NUMERIC_POINTER(AS_NUMERIC(PMcopy)); } else { PM = NUMERIC_POINTER(AS_NUMERIC(PMmat)); } rma_bg_correct(PM, rows, cols); if (asInteger(copy)){ UNPROTECT(2); } else { UNPROTECT(1); } if (asInteger(copy)){ return PMcopy; } else { return PMmat; } } preprocessCore/src/rma_background4.h0000644000126300012640000000050112127220006021121 0ustar00biocbuildphs_compbio#ifndef RMA_BACKGROUND4_H #define RMA_BACKGROUND4_H void rma_bg_parameters(double *PM,double *param, int rows, int cols, int column); void rma_bg_adjust(double *PM, double *param, int rows, int cols, int column); void rma_bg_correct(double *PM, int rows, int cols); SEXP R_rma_bg_correct(SEXP PMmat,SEXP copy); #endif preprocessCore/src/rma_common.c0000644000126300012640000000566212127220006020216 0ustar00biocbuildphs_compbio/*********************************************************************** ** ** file: rma_common.c ** ** aim: a location for commonly used utility functions ** ** ** written by: B. M. Bolstad ** ** created: Oct 16, 2002 ** last modified: Oct 16, 2002 ** ** history: ** Oct 16, 2002 - a place to put common utility code, created to help ** the R package build. ** Jan 2, 2003 - Clean up code comments ** Nov 13, 2006 - moved median function into this file from rma2.c ** ***********************************************************************/ #include "rma_common.h" #include #include #include #include #include #include /********************************************************** ** ** int sort_double(const void *a1,const void *a2) ** ** a comparison function used when sorting doubles. ** **********************************************************/ int sort_double(const double *a1,const double *a2){ if (*a1 < *a2) return (-1); if (*a1 > *a2) return (1); return 0; } /************************************************************************** ** ** double median(double *x, int length) ** ** double *x - vector ** int length - length of *x ** ** returns the median of *x ** *************************************************************************/ double median(double *x, int length){ /* int i; */ int half; double med; double *buffer = Calloc(length,double); memcpy(buffer,x,length*sizeof(double)); half = (length + 1)/2; /* qsort(buffer,length,sizeof(double), (int(*)(const void*, const void*))sort_double); if (length % 2 == 1){ med = buffer[half - 1]; } else { med = (buffer[half] + buffer[half-1])/2.0; } */ rPsort(buffer, length, half-1); med = buffer[half-1]; if (length % 2 == 0){ rPsort(buffer, length, half); med = (med + buffer[half])/2.0; } Free(buffer); return med; } /************************************************************************** ** ** double median_nocopy(double *x, int length) ** ** double *x - vector ** int length - length of *x ** ** returns the median of *x. note x is not order preserved when this function ** is called. ** *************************************************************************/ double median_nocopy(double *x, int length){ /* int i; */ int half; double med; double *buffer = x; //Calloc(length,double); half = (length + 1)/2; /* qsort(buffer,length,sizeof(double), (int(*)(const void*, const void*))sort_double); if (length % 2 == 1){ med = buffer[half - 1]; } else { med = (buffer[half] + buffer[half-1])/2.0; } */ rPsort(buffer, length, half-1); med = buffer[half-1]; if (length % 2 == 0){ rPsort(buffer, length, half); med = (med + buffer[half])/2.0; } return med; } preprocessCore/src/rma_common.h0000644000126300012640000000027412127220006020215 0ustar00biocbuildphs_compbio#ifndef RMA_COMMON #define RMA_COMMON 1 int sort_double(const double *a1,const double *a2); double median(double *x, int length); double median_nocopy(double *x, int length); #endif preprocessCore/src/weightedkerneldensity.c0000644000126300012640000005247212127220006022471 0ustar00biocbuildphs_compbio/***************************************************************************** ** ** file: weightedkerneldensity.c ** ** aim : compute weighted kernel density estimates ** ** Copyright (C) 2003-2008 Ben Bolstad ** ** created on: Mar 24, 2003 ** ** Description ** ** the aim here is to implement kernel density estimators, with the option to ** weight each observation. For speed we will use the FFT to convolve a weighted ** histogram with a kernel. ** ** ** History ** ** Mar 9, 2003 - Initial version ** Mar 10, 2003 - Add in FFT framework, Push into AffyExtensions ** Mar 11, 2003 - Add ability to do kernel density with arbitrary weights ** Apr 22, 2003 - fix computation of bandwidth. Add in linear interpolation ** so as to be more consistent with R. ** Apr 5, 2004 - all calloc/free are now Calloc/Free ** Mar 24, 2005 - Add in IQR function to handle obscure cases. ** Mar 15, 2008 - add KernelDensity_lowmem. weightedkerneldensity.c is ported from affyPLM to preprocessCore ** Oct 31, 2011 - Add additional kernels. Allow non-power of 2 nout in KernelDensity. Fix error in bandwidth calculation ** ****************************************************************************/ #include #include #include #include #include #include /* For memcpy */ #include "rma_common.h" #include "weightedkerneldensity.h" /***************************************************************************** ** ** void weighted_massdist(double *x, int nx, double *w, double *xlow, double *xhigh, double *y, int *ny) ** ** see AS R50 and AS 176 (AS = Applied Statistics) ** ** idea is to discretize the data, but have modified algorithm to put weights on each observation ** ** double *x - the data ** int nx - length of x ** double *w - weight for each one of x, vector should also be of length nx ** double *xlow - minimum value in x dimension ** double *xhigh - maximum value in x dimension ** double *y - on output will contain discretation scheme of data ** int ny - length of y ** ****************************************************************************/ static void weighted_massdist(double *x, int *nx, double *w, double *xlow, double *xhigh, double *y, int *ny){ double fx, xdelta, xmass, xpos; int i, ix, ixmax, ixmin; ixmin = 0; ixmax = *ny - 2; xmass = 0.0; xdelta = (*xhigh - *xlow) / (*ny - 1); for(i=0; i < *ny ; i++){ y[i] = 0.0; } for (i=0; i < *nx; i++){ xmass += w[i]; } xmass = 1.0/xmass; /* Rprintf("%f\n",xmass);*/ for(i=0; i < *nx ; i++) { if(R_FINITE(x[i])) { xpos = (x[i] - *xlow) / xdelta; ix = floor(xpos); fx = xpos - ix; if(ixmin <= ix && ix <= ixmax) { y[ix] += w[i]*(1 - fx); y[ix + 1] += w[i]*fx; } else if(ix == -1) { y[0] += w[i]*fx; } else if(ix == ixmax + 1) { y[ix] += w[i]*(1 - fx); } } } for(i=0; i < *ny; i++) y[i] *= xmass; } /***************************************************************************** ** ** void unweighted_massdist(double *x, int nx, double *xlow, double *xhigh, double *y, int *ny) ** ** see AS R50 and AS 176 (AS = Applied Statistics) ** ** idea is to discretize the data, does not put weights on each observation ** ** double *x - the data ** int nx - length of x ** double *w - weight for each one of x, vector should also be of length nx ** double *xlow - minimum value in x dimension ** double *xhigh - maximum value in x dimension ** double *y - on output will contain discretation scheme of data ** int ny - length of y ** ****************************************************************************/ static void unweighted_massdist(double *x, int *nx, double *xlow, double *xhigh, double *y, int *ny){ double fx, xdelta, xpos; int i, ix, ixmax, ixmin; ixmin = 0; ixmax = *ny - 2; xdelta = (*xhigh - *xlow) / (*ny - 1); for(i=0; i < *ny ; i++){ y[i] = 0.0; } for(i=0; i < *nx ; i++) { if(R_FINITE(x[i])) { xpos = (x[i] - *xlow) / xdelta; ix = (int)floor(xpos); fx = xpos - ix; if(ixmin <= ix && ix <= ixmax) { y[ix] += (1 - fx); y[ix + 1] += fx; } else if(ix == -1) { y[0] += fx; } else if(ix == ixmax + 1) { y[ix] += (1 - fx); } } } for(i=0; i < *ny; i++) y[i] *= (1.0/(double)(*nx)); } /********************************************************************* ** ** void twiddle(int N, int i, double *tf_real, double *tf_imag) ** ** int N - length of data series ** int i - ** double *tf_real - on output contains real part of twiddle factor ** double *tf_imag - on output contains imaginary part of twiddle factor ** ** twiddle factor in FFT ** ********************************************************************/ static void twiddle(int N, int i,double *tf_real, double *tf_imag){ double pi = 3.14159265358979323846; if (i ==0){ *tf_real = 1; *tf_imag = 0; } else { *tf_real = cos(2*pi*(double)i/(double)N); *tf_imag = -sin(2*pi*(double)i/(double)N); } } /********************************************************************* ** ** void twiddle2(int N, int i, double *tf_real, double *tf_imag) ** ** int N - length of data series ** int i - ** double *tf_real - on output contains real part of twiddle factor ** double *tf_imag - on output contains imaginary part of twiddle factor ** ** twiddle factor in FFT when computing inverse FFT ** ********************************************************************/ static void twiddle2(int N, int i,double *tf_real, double *tf_imag){ double pi = 3.14159265358979323846; if (i ==0){ *tf_real = 1; *tf_imag = 0; } else { *tf_real = cos(2*pi*(double)i/(double)N); *tf_imag = sin(2*pi*(double)i/(double)N); } } /********************************************************************* ** ** void fft_dif(double *f_real, double *f_imag, int p){ ** ** compute the FFT using Decimation In Frequency of a data sequence of length 2^p ** ** double *f_real - real component of data series ** double *f_imag - imaginary component of data series ** int p - where 2^p is length of data series ** ** computes the FFT in place, result is in reverse bit order. ** ********************************************************************/ static void fft_dif(double *f_real, double *f_imag, int p){ int BaseE, BaseO, i, j, k, Blocks, Points, Points2; double even_real, even_imag, odd_real, odd_imag; double tf_real, tf_imag; Blocks = 1; Points = 1 << p; for (i=0; i < p; i++){ Points2 = Points >> 1; BaseE = 0; for (j=0; j < Blocks; j++){ BaseO = BaseE + Points2; for (k =0; k < Points2; k++){ even_real = f_real[BaseE + k] + f_real[BaseO + k]; even_imag = f_imag[BaseE + k] + f_imag[BaseO + k]; twiddle(Points,k,&tf_real, &tf_imag); odd_real = (f_real[BaseE+k]-f_real[BaseO+k])*tf_real - (f_imag[BaseE+k]-f_imag[BaseO+k])*tf_imag; odd_imag = (f_real[BaseE+k]-f_real[BaseO+k])*tf_imag + (f_imag[BaseE+k]-f_imag[BaseO+k])*tf_real; f_real[BaseE+k] = even_real; f_imag[BaseE+k] = even_imag; f_real[BaseO+k] = odd_real; f_imag[BaseO+k] = odd_imag; } BaseE = BaseE + Points; } Blocks = Blocks << 1; Points = Points >> 1; } } /********************************************************************* ** ** void fft_ditI(double *f_real, double *f_imag, int p){ ** ** compute the IFFT using Decimation In time of a data sequence of length 2^p ** ** double *f_real - real component of data series ** double *f_imag - imaginary component of data series ** int p - where 2^p is length of data series ** ** computes the IFFT in place, where input is in reverse bit order. ** output is in normal order. ** ********************************************************************/ static void fft_ditI(double *f_real, double *f_imag, int p){ int i,j,k, Blocks, Points, Points2, BaseB, BaseT; double top_real, top_imag, bot_real, bot_imag, tf_real, tf_imag; Blocks = 1 << (p-1); Points = 2; for (i=0; i < p; i++){ Points2 = Points >> 1; BaseT = 0; for (j=0; j < Blocks; j++){ BaseB = BaseT+Points2; for (k=0; k < Points2; k++){ top_real = f_real[BaseT+k]; top_imag = f_imag[BaseT+k]; twiddle2(Points,k,&tf_real, &tf_imag); bot_real = f_real[BaseB+k]*tf_real - f_imag[BaseB+k]*tf_imag; bot_imag = f_real[BaseB+k]*tf_imag + f_imag[BaseB+k]*tf_real; f_real[BaseT+k] = top_real + bot_real; f_imag[BaseT+k] = top_imag + bot_imag; f_real[BaseB+k] = top_real - bot_real; f_imag[BaseB+k] = top_imag - bot_imag; } BaseT= BaseT + Points; } Blocks = Blocks >> 1; Points = Points << 1; } } /******************************************************************* ** ** static void fft_density_convolve(double *y, double *kords, int n) ** ** double *y - ve ** double *kords - ** int n - ** ** ******************************************************************/ static void fft_density_convolve(double *y, double *kords, int n){ int i; int nlog2 = (int)(log((double)n)/log(2.0) + 0.5); /* ugly hack to stop rounding problems */ double *y_imag = Calloc(n,double); double *kords_imag = Calloc(n,double); double *conv_real = Calloc(n,double); double *conv_imag = Calloc(n,double); /* printf("nlog2: %.30lf %d\n", log((double)n)/log(2.0),nlog2); */ fft_dif(y, y_imag, nlog2); fft_dif(kords,kords_imag,nlog2); for (i=0; i < n; i++){ conv_real[i] = y[i]*kords[i] + y_imag[i]*kords_imag[i]; conv_imag[i] = y[i]*(-1*kords_imag[i]) + y_imag[i]*kords[i]; } fft_ditI(conv_real, conv_imag, nlog2); for (i=0; i < n; i++){ kords[i] = conv_real[i]; } Free(conv_real); Free(conv_imag); Free(kords_imag); Free(y_imag); } /************************************************************** ** ** static void kernelize(double *data, int n, double bw, int kernel) ** ** double *data - data to kernelize ** int n - length of data. ** double bw - bandwidth for Kernel ** int kernel - an integer specifying which kernel to use ** 1 is gaussian, 2 is Epanechnikov, ** 3 is ........... ** ** ***************************************************************/ static void kernelize(double *data, int n, double bw, int kernel){ double a = 0.0; int i; double pi = 3.14159265358979323846; if (kernel == 1){ /* Gaussian Kernel */ for (i =0; i < n; i++){ data[i] = dnorm4(data[i],0.0,bw,0); } } else if (kernel == 2){ /* Epanechnikov Kernel */ a = bw * sqrt(5.0); for (i =0; i < n; i++){ if (fabs(data[i]) < a){ data[i] = 3.0/(4.0*a)*(1.0 - (fabs(data[i])/a)* (fabs(data[i])/a)); } else { data[i] = 0.0; } } } else if (kernel == 3){ /* Rectangular */ a = bw*sqrt(3.0); for (i =0; i < n; i++){ if (fabs(data[i]) < a){ data[i] = 0.5/a; } else { data[i] = 0.0; } } } else if (kernel == 4){ /* biweight */ a = bw*sqrt(7.0); for (i =0; i < n; i++){ if (fabs(data[i]) < a){ data[i] = 15.0/(16.0*a)*(1-(fabs(data[i])/a)* (fabs(data[i])/a))*(1-(fabs(data[i])/a)* (fabs(data[i])/a)); } else { data[i] = 0.0; } } } else if (kernel == 5){ /* cosine */ a = bw/sqrt(1.0/3.0 - 2/(pi*pi)); for (i =0; i < n; i++){ if (fabs(data[i]) < a){ data[i] = (1.0 + cos(pi*data[i]/a))/(2.0*a); } else { data[i] = 0.0; } } } else if (kernel == 6){ /* optcosine */ a = bw/sqrt(1.0 - 8.0/(pi*pi)); for (i =0; i < n; i++){ if (fabs(data[i]) < a){ data[i] = pi/4.0*cos(pi*data[i]/(2*a))/a; } else { data[i] = 0.0; } } } } /***************************************************************** ** ** static double compute_sd(double *x, int length) ** ** double *x - data vector ** int length - length of x ** ** compute the standard deviation of a data vector ** *****************************************************************/ static double compute_sd(double *x, int length){ int i; double sum=0.0,sum2=0.0; for (i = 0; i < length; i++){ sum+=x[i]; } sum = sum/(double)length; for (i=0; i < length; i++){ sum2+=(x[i]-sum)*(x[i] - sum); } return(sqrt(sum2/(double)(length-1))); } /***************************************************************** ** ** static double bandwidth_nrd0(double *x, int length, double iqr) ** ** double *x - data vector ** int length - length of x ** double iqr - IQR of *x ** ** compute the kernel bandwidth using nrd0 ** *****************************************************************/ static double bandwidth_nrd0(double *x, int length, double iqr){ double hi; double lo; hi = compute_sd(x, length); if (hi > iqr/1.34){ lo = iqr/1.34; } else { lo = hi; } if (lo == 0){ if (hi !=0){ lo = hi; } else if (fabs(x[0]) != 0){ lo = fabs(x[0]); } else { lo = 1.0; } } return (0.9*lo*pow((double)length, -0.2)); } /***************************************************************** ** ** static double bandwidth_nrd(double *x, int length, double iqr) ** ** double *x - data vector ** int length - length of x ** double iqr - IQR of *x ** ** compute the kernel bandwidth using nrd ** *****************************************************************/ static double bandwidth_nrd(double *x, int length, double iqr){ double sd; double hi = iqr/1.34; double lo; sd = compute_sd(x, length); if (sd > hi){ lo = hi; } else { lo = sd; } return(1.06*lo*pow((double)length, -0.2)); } /***************************************************************** ** ** static double bandwidth(double *x, int length, double iqr, int bw_fn) ** ** double *x - data vector ** int length - length of x ** double iqr - IQR of *x ** int bw_fn - 0 for nrd0, 1 for nrd ** ** compute the kernel bandwidth using nrd ** *****************************************************************/ static double bandwidth(double *x, int length, double iqr, int bw_fn){ if (bw_fn == 0){ return(bandwidth_nrd0(x, length, iqr)); } else if (bw_fn == 1){ return(bandwidth_nrd(x, length, iqr)); } } /****************************************************************** ** ** double linear_interpolate_helper(double v, double *x, double *y, int n) ** ** double v ** double *x ** double *y ** int n ** ** linearly interpolate v given x and y. ** ** **********************************************************************/ static double linear_interpolate_helper(double v, double *x, double *y, int n) { int i, j, ij; i = 0; j = n - 1; if(v < x[i]) return y[0]; if(v > x[j]) return y[n-1]; /* find the correct interval by bisection */ while(i < j - 1) { /* x[i] <= v <= x[j] */ ij = (i + j)/2; /* i+1 <= ij <= j-1 */ if(v < x[ij]) j = ij; else i = ij; /* still i < j */ } /* provably have i == j-1 */ /* interpolation */ if(v == x[j]) return y[j]; if(v == x[i]) return y[i]; /* impossible: if(x[j] == x[i]) return y[i]; */ return y[i] + (y[j] - y[i]) * ((v - x[i])/(x[j] - x[i])); } /********************************************************************* ** ** void linear_interpolate(double *x, double *y, double *xout, double *yout, int length) ** ** double *x ** double *y ** double *xout ** double *yout ** int length ** ** given x and y, interpolate linearly at xout putting the results in yout ** ** **********************************************************************/ static void linear_interpolate(double *x, double *y, double *xout, double *yout, int length, int length_out){ int i; for(i=0 ; i < length_out; i++) yout[i] = linear_interpolate_helper(xout[i], x, y, length); } static double IQR(double *x, int length); /********************************************************************** ** ** void KernelDensity(double *x, int *nxxx, double *output, double *xords, double *weights) ** ** double *x - data vector ** int *nxxx - length of x ** double *output - place to output density values ** double *xords - x coordinates corresponding to output ** double *weights - a weight for each item of *x should be of length *nxxx ** int *nout - length of output should be a power of two, preferably 512 or above ** ** ** **********************************************************************/ void KernelDensity(double *x, int *nxxx, double *weights, double *output, double *output_x, int *nout, int *kernel_fn, int *bandwidth_fn, double *bandwidth_adj){ int nx = *nxxx; int nuser = *nout; int n; /* = *nout; */ int n2; /* == 2*n; */ int i; double low, high, iqr, bw, to, from; double *kords; /* = Calloc(2*n,double);*/ double *buffer; /* = Calloc(nx,double);*/ double *y; /* = Calloc(2*n,double);*/ double *xords; /* = Calloc(n,double);*/ int kern_fn=*kernel_fn; int bw_fn=*bandwidth_fn; double bw_adj = *bandwidth_adj; n = (int)pow(2.0,ceil(log2(nuser))); if (n < 512){ n = 512; } n2 = 2*n; kords = Calloc(n2,double); buffer = Calloc(nx,double); y = Calloc(n2,double); xords = Calloc(n,double); memcpy(buffer,x,nx*sizeof(double)); qsort(buffer,nx,sizeof(double),(int(*)(const void*, const void*))sort_double); low = buffer[0]; high = buffer[nx-1]; iqr = IQR(buffer,nx); /* buffer[(int)(0.75*nx + 0.5)] - buffer[(int)(0.25*nx+0.5)]; */ bw =bw_adj*bandwidth(x,nx,iqr,bw_fn); low = low - 7*bw; high = high + 7*bw; for (i=0; i <= n; i++){ kords[i] = (double)i/(double)(2*n -1)*2*(high - low); } for (i=n+1; i < 2*n; i++){ kords[i] = -kords[2*n - i]; } /* bw = bandwidth(x,nx,iqr); */ /* printf("iqr: %f bw: %f\n",iqr,bw); */ kernelize(kords, 2*n,bw,kern_fn); weighted_massdist(x, &nx, weights, &low, &high, y, &n); fft_density_convolve(y,kords,n2); to = high - 4*bw; /* corrections to get on correct output range */ from = low + 4* bw; for (i=0; i < n; i++){ xords[i] = (double)i/(double)(n -1)*(high - low) + low; } for (i=0; i < nuser; i++){ output_x[i] = (double)i/(double)(nuser -1)*(to - from) + from; } for (i =0; i < n; i++){ kords[i] = kords[i]/n2; } /* to get results that agree with R really need to do linear interpolation */ linear_interpolate(xords, kords, output_x, output, n, nuser); Free(xords); Free(y); Free(buffer); Free(kords); } /** ** ** Note the following function assumes that data (x) is sorted ** ** Aim is to duplicate R quantile function ** **/ static double IQR(double *x, int length){ double lowindex, highindex; double lowfloor, highfloor; double lowceil, highceil; int low_i, high_i; double low_h, high_h; double qslow, qshigh; lowindex = (double)(length -1)*0.25; highindex = (double)(length -1)*0.75; lowfloor = floor(lowindex); highfloor = floor(highindex); lowceil = ceil(lowindex); highceil = ceil(highindex); low_i = lowindex > lowfloor; high_i = highindex > highfloor; qslow = x[(int)lowfloor]; qshigh = x[(int)highfloor]; low_h = lowindex - lowfloor; high_h = highindex - highfloor; if (low_h > 1e-10){ qslow = (1.0 - low_h)*qslow + low_h*x[(int)lowceil]; } if (high_h > 1e-10){ qshigh = (1.0 - high_h)*qshigh + high_h*x[(int)highceil]; } return qshigh - qslow; } /********************************************************************** ** ** void KernelDensity_lowmem(double *x, int *nxxx, double *output, double *xords, double *weights) ** ** double *x - data vector (note order will be changed on output) ** int *nxxx - length of x ** double *output - place to output density values ** double *xords - x coordinates corresponding to output ** double *weights - a weight for each item of *x should be of length *nxxx ** int *nout - length of output should be a power of two, preferably 512 or above ** ** **********************************************************************/ void KernelDensity_lowmem(double *x, int *nxxx, double *output, double *output_x, int *nout){ int nx = *nxxx; int n = *nout; int n2= 2*n; int i; double low, high,iqr,bw,from,to; double *kords = Calloc(2*n,double); double *buffer = x; double *y = Calloc(2*n,double); double *xords = Calloc(n,double); qsort(buffer,nx,sizeof(double),(int(*)(const void*, const void*))sort_double); low = buffer[0]; high = buffer[nx-1]; iqr = IQR(buffer,nx); //buffer[(int)(0.75*nx+0.5)] - buffer[(int)(0.25*nx+0.5)]; bw = bandwidth_nrd0(x,nx,iqr); low = low - 7*bw; high = high + 7*bw; for (i=0; i <= n; i++){ kords[i] = (double)i/(double)(2*n -1)*2*(high - low); } for (i=n+1; i < 2*n; i++){ kords[i] = -kords[2*n - i]; } //bw = bandwidth(x,nx,iqr); /* printf("bw: %f\n",bw);*/ kernelize(kords, 2*n,bw,2); unweighted_massdist(x, &nx, &low, &high, y, &n); fft_density_convolve(y,kords,n2); to = high - 4*bw; /* corrections to get on correct output range */ from = low + 4* bw; for (i=0; i < n; i++){ xords[i] = (double)i/(double)(n -1)*(high - low) + low; output_x[i] = (double)i/(double)(n -1)*(to - from) + from; } for (i =0; i < n; i++){ kords[i] = kords[i]/n2; } // to get results that agree with R really need to do linear interpolation linear_interpolate(xords, kords, output_x, output, n, n); Free(xords); Free(y); Free(kords); } preprocessCore/src/weightedkerneldensity.h0000644000126300012640000000051112127220006022461 0ustar00biocbuildphs_compbio#ifndef WEIGHTEDKERNELDENSITY_H #define WEIGHTEDKERNELDENSITY_H void KernelDensity(double *x, int *nxxx, double *weights, double *output, double *output_x, int *nout, int *kernel_fn, int *bandwidth_fn, double *bandwidth_adj); void KernelDensity_lowmem(double *x, int *nxxx, double *output, double *output_x, int *nout); #endif preprocessCore/tests/0000755000126300012640000000000012127132753016300 5ustar00biocbuildphs_compbiopreprocessCore/tests/PLMdtest.R0000644000126300012640000000746712127132753020135 0ustar00biocbuildphs_compbio library(preprocessCore) values <- rnorm(100) group.labels <- sample(0:4,replace=TRUE, 100) results <- double(10000) ngroups <- 2 for (i in 1:10000){ values <- rnorm(100,sd=1) values <- values/sd(values) group.labels <- sample(0:(ngroups-1),replace=TRUE, 100) blah <- .C("R_split_test",as.double(values), as.integer(100), as.integer(ngroups), as.integer(group.labels),double(1)) results[i] <- blah[[5]] } plot(sort(results),qchisq(0:9999/10000,ngroups-1)) lm(qchisq(0:9999/10000,ngroups-1) ~ sort(results)) boxplot(values ~ group.labels,ylim=c(-2,2)) sc <- median(abs(resid(lm(values ~ 1))))/0.6745 sum((resid(lm(values ~ 1))/sc)^2)/2 sum((resid(lm(values ~ as.factor(group.labels)))/sc)^2)/2 values <- rnorm(100) group.labels <- sample(0:4,replace=TRUE, 100) values[group.labels == 1] <- values[group.labels == 1] + 0.4 blah <- .C("R_split_test",as.double(values), as.integer(100), as.integer(5), as.integer(group.labels),double(1)) boxplot(values ~ group.labels,ylim=c(-2,2)) library(preprocessCore) .C("R_test_get_design_matrix",as.integer(4),as.integer(5)) chips <- as.factor(rep(c(1,2,3,4,5,6),c(5,5,5,5,5,5))) probes <- rep(c(1,3,4,5,6),6) probes[c(1,6,11)] <- 2 ##probes[24 + c(8,16,24)] <- 10 probes <- as.factor(probes) model.matrix(~ -1 + probes)%*%contr.sum(6) probes <- rep(c(1,3,4,5,6),6) probes[c(1,6,11)] <- 2 probes[c(20,25,30)] <- 7 probes <- as.factor(probes) model.matrix(~ -1 + probes)%*%contr.sum(7) probes <- rep(c(1,3,4,5,6),6) probes[c(1,6,11)] <- 2 probes[c(5,10,15)] <- 7 probes <- as.factor(probes) model.matrix(~ -1 + probes)%*%contr.sum(7) probes <- rep(c(1,3,4,5,6),6) probes[c(1,6,11)] <- 2 probes[1+c(1,6,11)] <- 8 probes[2+c(1,6,11)] <- 9 probes[3+c(1,6,11)] <- 10 probes[c(5,10,15)] <- 7 probes <- as.factor(probes) model.matrix(~ -1 + probes)%*%contr.sum(10) true.probes <- c(4,3,2,1,-1,-2,-3,-4) true.chips <- c(8,9,10,11,12,13) y <- outer(true.probes,true.chips,"+") estimate.coefficients <- function(y){ colmean <- apply(y,2,mean) y <- sweep(y,2,FUN="-",colmean) rowmean <- apply(y,1,mean) y <- sweep(y,1,FUN="-",rowmean) list(y,colmean,rowmean) } estimate.coefficients(y) y <- outer(true.probes,true.chips,"+") estimate.coefficients(y) y2 <- sweep(y,2,FUN="-",apply(y,2,mean)) c(3.875, 2.875, 1.875, 0.875, -1.125, -2.125, -3.125, -4, -2.25) cp <- rep(c(1,2,3,4,5,6),rep(8,6)) pr <- rep(c(1,2,3,4,5,6,7,8),6) pr[c(32,40,48)] <- 9 true.probes <- c(4,3,2,1,-1,-2,-3,-4) true.chips <- c(8,9,10,11,12,10) y <- outer(true.probes,true.chips,"+") + rnorm(48,0,0.1) y[8,4:6] <- c(11,12,10)+2 + rnorm(3,0,0.1) lm(as.vector(y) ~ -1 + as.factor(cp) + C(as.factor(pr),"contr.sum")) matplot(y,type="l") matplot(matrix(fitted( lm(as.vector(y) ~ -1 + as.factor(cp) + C(as.factor(pr),"contr.sum"))),ncol=6),type="l") library(preprocessCore) true.probes <- c(4,3,2,1,-1,-2,-3,-4) true.chips <- c(8,9,10,11,12,10) y <- outer(true.probes,true.chips,"+") + rnorm(48,0,0.25) y[8,4:6] <- c(11,12,10)+ 2.5 + rnorm(3,0,0.25) y[5,4:6] <- c(11,12,10)+-2.5 + rnorm(3,0,0.25) ###.C("plmd_fit_R", as.double(y), as.integer(8), as.integer(6), ### as.integer(2), as.integer(c(1,1,1,2,2,2) - 1), ### double(6 +2*8), ### double(48), ### double(48)) ###matplot(matrix(.C("plmd_fit_R", as.double(y), as.integer(8), as.integer(6), ### as.integer(2), as.integer(c(1,1,1,2,2,2) - 1), ### double(6 +2*8), ### double(48), ### double(48))[[7]],ncol=6)) ### ##.Call("R_plmd_model",y,0,1.3345,as.integer(c(1,1,1,2,2,2) - 1),as.integer(2)) rcModelPLM(y) rcModelPLMd(y,c(1,1,1,2,2,2)) ###R_plmd_model(SEXP Y, SEXP PsiCode, SEXP PsiK, SEXP Groups, SEXP Ngroups) pr[seq(3,48,8)][1:3] <- 10 y[seq(3,48,8)][1:3] <- c(8,9,10) -3 + rnorm(3,0,0.1) lm(as.vector(y) ~ -1 + as.factor(cp) + C(as.factor(pr),"contr.sum"))