affyio/DESCRIPTION0000644000175400017540000000132313556146041014567 0ustar00biocbuildbiocbuildPackage: affyio Version: 1.56.0 Title: Tools for parsing Affymetrix data files Author: Ben Bolstad Maintainer: Ben Bolstad Depends: R (>= 2.6.0) Imports: zlibbioc, methods Description: Routines for parsing Affymetrix data files based upon file format information. Primary focus is on accessing the CEL and CDF file formats. License: LGPL (>= 2) URL: https://github.com/bmbolstad/affyio biocViews: Microarray, DataImport, Infrastructure LazyLoad: yes git_url: https://git.bioconductor.org/packages/affyio git_branch: RELEASE_3_10 git_last_commit: fd0e865 git_last_commit_date: 2019-10-29 Date/Publication: 2019-10-29 NeedsCompilation: yes Packaged: 2019-10-29 23:32:49 UTC; biocbuild affyio/HISTORY0000644000175400017540000000036213556116171014150 0ustar00biocbuildbiocbuildNov 29, 2005 - Initial importation of existing C source code from the affy and makecdfenv packages. Dec 1, 2005 - comment cleaning in C source code. Add read.cdffile.list.R, check.cdf.type.R July 23, 2015 - Added function get.celfile.datesaffyio/NAMESPACE0000644000175400017540000000013213556116171014276 0ustar00biocbuildbiocbuildexportPattern("^[^\\.]") useDynLib("affyio") import(zlibbioc) importFrom("methods", "is") affyio/R/0000755000175400017540000000000013556116171013264 5ustar00biocbuildbiocbuildaffyio/R/check.cdf.type.R0000644000175400017540000000062313556116171016200 0ustar00biocbuildbiocbuild### ### File: check.cdf.type.R ### ### Aim: return a string giving the file format. Either text, xda or unknown ### in the case that file format is not known. ### check.cdf.type <- function(filename){ if (.Call("CheckCDFtext",filename,PACKAGE="affyio")){ return("text") } else if (.Call("CheckCDFXDA",filename,PACKAGE="affyio")){ return("xda") } else { return("unknown") } } affyio/R/get.celfile.dates.R0000644000175400017540000000102513556116171016665 0ustar00biocbuildbiocbuildget.celfile.dates <- function(filenames,...){ chardates<-vector("character",length(filenames)) for(i in seq(along=chardates)){ tmp<-read.celfile.header(filenames[i],info="full") chardates[i]<-strsplit(tmp$ScanDate,"T|\ ")[[1]][1] } dates<-as.Date(rep(NA,length(chardates))) ind <- grep("-",chardates) if(length(ind)>0) dates[ind]<-as.Date(chardates[ind],"%Y-%m-%d") ind <- grep("/",chardates) if(length(ind)>0) dates[ind]<-as.Date(chardates[ind],"%m/%d/%y") return(dates) } affyio/R/read.cdffile.list.R0000644000175400017540000000132313556116171016666 0ustar00biocbuildbiocbuild### ### File: read.cdffile.list.R ### ### Aim: reads full CDF file into R list structure. ### ### History ### Dec 1, 2005 - Initial version ### read.cdffile.list <- function (filename, cdf.path = getwd()){ cdf.type <- check.cdf.type(file.path(path.expand(cdf.path),filename)) if (cdf.type == "xda"){ .Call("ReadCDFFileIntoRList", file.path(path.expand(cdf.path), filename), TRUE, PACKAGE = "affyio") } else if (cdf.type =="text"){ .Call("ReadtextCDFFileIntoRList", file.path(path.expand(cdf.path), filename), TRUE, PACKAGE = "affyio") } else { stop(paste("File format for",filename,"not recognized.")) } } affyio/R/read.celfile.R0000644000175400017540000000043713556116171015730 0ustar00biocbuildbiocbuild### ### File: read.celfile.R ### ### Aim: read entire contents of a single given specified CEL file into ### an R data structure. ### read.celfile <- function(filename,intensity.means.only=FALSE){ return(.Call("R_read_cel_file",filename,intensity.means.only,PACKAGE="affyio")) } affyio/R/read.celfile.header.R0000644000175400017540000000312413556116171017153 0ustar00biocbuildbiocbuild### ### File: read.celfile.header.R ### ### Aim: read header contents of a given specified CEL file into ### an R data structure. ### read.celfile.header <- function(filename,info=c("basic","full"),verbose=FALSE){ compress <- FALSE info <- match.arg(info) if (info == "basic"){ if (verbose) cat("Reading", filename, "to get header information.\n") headdetails <- .Call("ReadHeader", filename, PACKAGE="affyio") names(headdetails) <- c("cdfName","CEL dimensions") names(headdetails$"CEL dimensions") <- c("Cols", "Rows") } else { if (verbose) cat("Reading", filename, "to get full header information.\n") ### full returns greater detailed information from the header. Exact details differ depending on the file format. headdetails <- try(.Call("ReadHeaderDetailed", filename, PACKAGE="affyio")) if (is(headdetails, "try-error")) stop("Failed to get full header information for ", filename) names(headdetails) <- c("cdfName","CEL dimensions","GridCornerUL","GridCornerUR","GridCornerLR","GridCornerLL","DatHeader","Algorithm","AlgorithmParameters","ScanDate") names(headdetails$"CEL dimensions") <- c("Cols", "Rows") if (nchar(headdetails$ScanDate) == 0){ # try to extract it from the DatHeader DatHeaderSplit <- strsplit(headdetails$DatHeader," ") Which.Date <- grep("[0-9]*/[0-9]*/[0-9]*",DatHeaderSplit[[1]]) Which.Time <- grep("[0-9]*:[0-9]*:[0-9]*",DatHeaderSplit[[1]]) headdetails$ScanDate <- paste(DatHeaderSplit[[1]][Which.Date],DatHeaderSplit[[1]][Which.Time]) } } return(headdetails) } affyio/R/read.probematrices.R0000644000175400017540000000175013556116171017163 0ustar00biocbuildbiocbuild### ### File: read.probematrices.R ### ### Aim: read in PM or MM or both probe intensities into a list of ### of matrices where the probes for particular probesets are ### contiguous in the returned matrix ### Each Matrix has probes in rows and arrays in columns ### ### Copyright (C) 2005 B. M. Bolstad ### ### History ### Nov 30, 2005 - Initial version ### read.celfile.probeintensity.matrices <- function(filenames, cdfInfo, rm.mask=FALSE, rm.outliers=FALSE, rm.extra=FALSE, verbose=FALSE, which= c("pm","mm","both")){ which <- match.arg(which) filenames <- as.character(filenames) if (verbose) cat("Reading", filenames[1], "to get header information.\n") headdetails <- .Call("ReadHeader", filenames[1], PACKAGE="affyio") dim.intensity <- headdetails[[2]] ref.cdfName <- headdetails[[1]] .Call("read_probeintensities", filenames, rm.mask, rm.outliers, rm.extra, ref.cdfName, dim.intensity, verbose, cdfInfo,which, PACKAGE="affyio") } affyio/R/read_abatch.R0000644000175400017540000000024613556116171015626 0ustar00biocbuildbiocbuild read_abatch <- function(...) .Call("read_abatch", ..., PACKAGE="affyio") read_abatch_stddev <- function(...) .Call("read_abatch_stddev", ..., PACKAGE="affyio") affyio/aclocal.m40000644000175400017540000000113413556116171014722 0ustar00biocbuildbiocbuild## ## Try finding zlib library and headers ## ## R_ZLIB() ## AC_DEFUN([R_ZLIB], [ have_zlib=no AC_CHECK_LIB(z, main, [ AC_CHECK_HEADER(zlib.h, [ AC_MSG_CHECKING([if zlib version >= 1.1.3]) AC_TRY_RUN([ #include "confdefs.h" #include #include int main() { #ifdef ZLIB_VERSION return(strcmp(ZLIB_VERSION, "1.1.3") < 0); #else return(1); #endif }], [AC_MSG_RESULT([yes]) have_zlib=yes], AC_MSG_RESULT([no]), AC_MSG_RESULT([no])) ]) ]) if test "${have_zlib}" = yes; then AC_DEFINE(HAVE_ZLIB) LIBS='-lz '$LIBS fi ]) affyio/configure0000755000175400017540000040543013556146041014777 0ustar00biocbuildbiocbuild#! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.69. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # # # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo # Prefer a ksh shell builtin over an external printf program on Solaris, # but without wasting forks for bash or zsh. if test -z "$BASH_VERSION$ZSH_VERSION" \ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='print -r --' as_echo_n='print -rn --' elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in #( *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # Unset variables that we do not need and which cause bugs (e.g. in # pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" # suppresses any "Segmentation fault" message there. '((' could # trigger a bug in pdksh 5.2.14. for as_var in BASH_ENV ENV MAIL MAILPATH do eval test x\${$as_var+set} = xset \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # CDPATH. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH # Use a proper internal environment variable to ensure we don't fall # into an infinite loop, continuously re-executing ourselves. if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then _as_can_reexec=no; export _as_can_reexec; # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also # works around shells that cannot unset nonexistent variables. # Preserve -v and -x to the replacement shell. BASH_ENV=/dev/null ENV=/dev/null (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV case $- in # (((( *v*x* | *x*v* ) as_opts=-vx ;; *v* ) as_opts=-v ;; *x* ) as_opts=-x ;; * ) as_opts= ;; esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. $as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 as_fn_exit 255 fi # We don't want this to propagate to other subprocesses. { _as_can_reexec=; unset _as_can_reexec;} if test "x$CONFIG_SHELL" = x; then as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST else case \`(set -o) 2>/dev/null\` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi " as_required="as_fn_return () { (exit \$1); } as_fn_success () { as_fn_return 0; } as_fn_failure () { as_fn_return 1; } as_fn_ret_success () { return 0; } as_fn_ret_failure () { return 1; } exitcode=0 as_fn_success || { exitcode=1; echo as_fn_success failed.; } as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : else exitcode=1; echo positional parameters were not saved. fi test x\$exitcode = x0 || exit 1 test -x / || exit 1" as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 test \$(( 1 + 1 )) = 2 || exit 1" if (eval "$as_required") 2>/dev/null; then : as_have_required=yes else as_have_required=no fi if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. as_found=: case $as_dir in #( /*) for as_base in sh bash ksh sh5; do # Try only shells that exist, to save several forks. as_shell=$as_dir/$as_base if { test -f "$as_shell" || test -f "$as_shell.exe"; } && { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : CONFIG_SHELL=$as_shell as_have_required=yes if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : break 2 fi fi done;; esac as_found=false done $as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : CONFIG_SHELL=$SHELL as_have_required=yes fi; } IFS=$as_save_IFS if test "x$CONFIG_SHELL" != x; then : export CONFIG_SHELL # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also # works around shells that cannot unset nonexistent variables. # Preserve -v and -x to the replacement shell. BASH_ENV=/dev/null ENV=/dev/null (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV case $- in # (((( *v*x* | *x*v* ) as_opts=-vx ;; *v* ) as_opts=-v ;; *x* ) as_opts=-x ;; * ) as_opts= ;; esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. $as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi if test x$as_have_required = xno; then : $as_echo "$0: This script requires a shell more modern than all" $as_echo "$0: the shells that I found on your system." if test x${ZSH_VERSION+set} = xset ; then $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" $as_echo "$0: be upgraded to zsh 4.3.4 or later." else $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, $0: including any error possibly output before this $0: message. Then install a modern shell, or manually run $0: the script under such a shell if you do have one." fi exit 1 fi fi fi SHELL=${CONFIG_SHELL-/bin/sh} export SHELL # Unset more variables known to interfere with behavior of common tools. CLICOLOR_FORCE= GREP_OPTIONS= unset CLICOLOR_FORCE GREP_OPTIONS ## --------------------- ## ## M4sh Shell Functions. ## ## --------------------- ## # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p # as_fn_executable_p FILE # ----------------------- # Test if FILE is an executable regular file. as_fn_executable_p () { test -f "$1" && test -x "$1" } # as_fn_executable_p # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : eval 'as_fn_append () { eval $1+=\$2 }' else as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : eval 'as_fn_arith () { as_val=$(( $* )) }' else as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi $as_echo "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits as_lineno_1=$LINENO as_lineno_1a=$LINENO as_lineno_2=$LINENO as_lineno_2a=$LINENO eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) sed -n ' p /[$]LINENO/= ' <$as_myself | sed ' s/[$]LINENO.*/&-/ t lineno b :lineno N :loop s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ t loop s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } # If we had to re-execute with $CONFIG_SHELL, we're ensured to have # already done that, so ensure we don't try to do so again and fall # in an infinite loop. This has already happened in practice. _as_can_reexec=no; export _as_can_reexec # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensitive to this). . "./$as_me.lineno" # Exit status is that of the last command. exit } ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -pR' fi else as_ln_s='cp -pR' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" test -n "$DJDIR" || exec 7<&0 &1 # Name of the host. # hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, # so uname gets run too. ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` # # Initializations. # ac_default_prefix=/usr/local ac_clean_files= ac_config_libobj_dir=. LIBOBJS= cross_compiling=no subdirs= MFLAGS= MAKEFLAGS= # Identity of this package. PACKAGE_NAME= PACKAGE_TARNAME= PACKAGE_VERSION= PACKAGE_STRING= PACKAGE_BUGREPORT= PACKAGE_URL= ac_unique_file=""DESCRIPTION"" # Factoring default headers for most tests. ac_includes_default="\ #include #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef STDC_HEADERS # include # include #else # ifdef HAVE_STDLIB_H # include # endif #endif #ifdef HAVE_STRING_H # if !defined STDC_HEADERS && defined HAVE_MEMORY_H # include # endif # include #endif #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_INTTYPES_H # include #endif #ifdef HAVE_STDINT_H # include #endif #ifdef HAVE_UNISTD_H # include #endif" ac_subst_vars='LTLIBOBJS LIBOBJS EGREP GREP CPP OBJEXT EXEEXT ac_ct_CC CPPFLAGS LDFLAGS CFLAGS CC target_alias host_alias build_alias LIBS ECHO_T ECHO_N ECHO_C DEFS mandir localedir libdir psdir pdfdir dvidir htmldir infodir docdir oldincludedir includedir localstatedir sharedstatedir sysconfdir datadir datarootdir libexecdir sbindir bindir program_transform_name prefix exec_prefix PACKAGE_URL PACKAGE_BUGREPORT PACKAGE_STRING PACKAGE_VERSION PACKAGE_TARNAME PACKAGE_NAME PATH_SEPARATOR SHELL' ac_subst_files='' ac_user_opts=' enable_option_checking enable_threading ' ac_precious_vars='build_alias host_alias target_alias CC CFLAGS LDFLAGS LIBS CPPFLAGS CPP' # Initialize some variables set by options. ac_init_help= ac_init_version=false ac_unrecognized_opts= ac_unrecognized_sep= # The variables have the same names as the options, with # dashes changed to underlines. cache_file=/dev/null exec_prefix=NONE no_create= no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= verbose= x_includes=NONE x_libraries=NONE # Installation directory options. # These are left unexpanded so users can "make install exec_prefix=/foo" # and all the variables that are supposed to be based on exec_prefix # by default will actually change. # Use braces instead of parens because sh, perl, etc. also accept them. # (The list follows the same order as the GNU Coding Standards.) bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datarootdir='${prefix}/share' datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE}' infodir='${datarootdir}/info' htmldir='${docdir}' dvidir='${docdir}' pdfdir='${docdir}' psdir='${docdir}' libdir='${exec_prefix}/lib' localedir='${datarootdir}/locale' mandir='${datarootdir}/man' ac_prev= ac_dashdash= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval $ac_prev=\$ac_option ac_prev= continue fi case $ac_option in *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; *=) ac_optarg= ;; *) ac_optarg=yes ;; esac # Accept the important Cygnus configure options, so we can diagnose typos. case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir=$ac_optarg ;; -build | --build | --buil | --bui | --bu) ac_prev=build_alias ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build_alias=$ac_optarg ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file=$ac_optarg ;; --config-cache | -C) cache_file=config.cache ;; -datadir | --datadir | --datadi | --datad) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=*) datadir=$ac_optarg ;; -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ | --dataroo | --dataro | --datar) ac_prev=datarootdir ;; -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) datarootdir=$ac_optarg ;; -disable-* | --disable-*) ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=no ;; -docdir | --docdir | --docdi | --doc | --do) ac_prev=docdir ;; -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) docdir=$ac_optarg ;; -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) ac_prev=dvidir ;; -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) dvidir=$ac_optarg ;; -enable-* | --enable-*) ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=\$ac_optarg ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix=$ac_optarg ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he | -h) ac_init_help=long ;; -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) ac_init_help=recursive ;; -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) ac_init_help=short ;; -host | --host | --hos | --ho) ac_prev=host_alias ;; -host=* | --host=* | --hos=* | --ho=*) host_alias=$ac_optarg ;; -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) ac_prev=htmldir ;; -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ | --ht=*) htmldir=$ac_optarg ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir=$ac_optarg ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir=$ac_optarg ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir=$ac_optarg ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir=$ac_optarg ;; -localedir | --localedir | --localedi | --localed | --locale) ac_prev=localedir ;; -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) localedir=$ac_optarg ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst | --locals) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) localstatedir=$ac_optarg ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir=$ac_optarg ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c | -n) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir=$ac_optarg ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix=$ac_optarg ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix=$ac_optarg ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix=$ac_optarg ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name=$ac_optarg ;; -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) ac_prev=pdfdir ;; -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) pdfdir=$ac_optarg ;; -psdir | --psdir | --psdi | --psd | --ps) ac_prev=psdir ;; -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) psdir=$ac_optarg ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir=$ac_optarg ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir=$ac_optarg ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site=$ac_optarg ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir=$ac_optarg ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir=$ac_optarg ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target_alias ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target_alias=$ac_optarg ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers | -V) ac_init_version=: ;; -with-* | --with-*) ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=\$ac_optarg ;; -without-* | --without-*) ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=no ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes=$ac_optarg ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; -*) as_fn_error $? "unrecognized option: \`$ac_option' Try \`$0 --help' for more information" ;; *=*) ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` # Reject names that are not valid shell variable names. case $ac_envvar in #( '' | [0-9]* | *[!_$as_cr_alnum]* ) as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; esac eval $ac_envvar=\$ac_optarg export $ac_envvar ;; *) # FIXME: should be removed in autoconf 3.0. $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" ;; esac done if test -n "$ac_prev"; then ac_option=--`echo $ac_prev | sed 's/_/-/g'` as_fn_error $? "missing argument to $ac_option" fi if test -n "$ac_unrecognized_opts"; then case $enable_option_checking in no) ;; fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; esac fi # Check all directory arguments for consistency. for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ libdir localedir mandir do eval ac_val=\$$ac_var # Remove trailing slashes. case $ac_val in */ ) ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` eval $ac_var=\$ac_val;; esac # Be sure to have absolute directory names. case $ac_val in [\\/$]* | ?:[\\/]* ) continue;; NONE | '' ) case $ac_var in *prefix ) continue;; esac;; esac as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" done # There might be people who depend on the old broken behavior: `$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias host=$host_alias target=$target_alias # FIXME: To remove some day. if test "x$host_alias" != x; then if test "x$build_alias" = x; then cross_compiling=maybe elif test "x$build_alias" != "x$host_alias"; then cross_compiling=yes fi fi ac_tool_prefix= test -n "$host_alias" && ac_tool_prefix=$host_alias- test "$silent" = yes && exec 6>/dev/null ac_pwd=`pwd` && test -n "$ac_pwd" && ac_ls_di=`ls -di .` && ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || as_fn_error $? "working directory cannot be determined" test "X$ac_ls_di" = "X$ac_pwd_ls_di" || as_fn_error $? "pwd does not report name of working directory" # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then the parent directory. ac_confdir=`$as_dirname -- "$as_myself" || $as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_myself" : 'X\(//\)[^/]' \| \ X"$as_myself" : 'X\(//\)$' \| \ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_myself" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` srcdir=$ac_confdir if test ! -r "$srcdir/$ac_unique_file"; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r "$srcdir/$ac_unique_file"; then test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" fi ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" ac_abs_confdir=`( cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" pwd)` # When building in place, set srcdir=. if test "$ac_abs_confdir" = "$ac_pwd"; then srcdir=. fi # Remove unnecessary trailing slashes from srcdir. # Double slashes in file names in object file debugging info # mess up M-x gdb in Emacs. case $srcdir in */) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; esac for ac_var in $ac_precious_vars; do eval ac_env_${ac_var}_set=\${${ac_var}+set} eval ac_env_${ac_var}_value=\$${ac_var} eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} eval ac_cv_env_${ac_var}_value=\$${ac_var} done # # Report the --help message. # if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF \`configure' configures this package to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... To assign environment variables (e.g., CC, CFLAGS...), specify them as VAR=VALUE. See below for descriptions of some of the useful variables. Defaults for the options are specified in brackets. Configuration: -h, --help display this help and exit --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit -q, --quiet, --silent do not print \`checking ...' messages --cache-file=FILE cache test results in FILE [disabled] -C, --config-cache alias for \`--cache-file=config.cache' -n, --no-create do not create output files --srcdir=DIR find the sources in DIR [configure dir or \`..'] Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] By default, \`make install' will install all the files in \`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify an installation prefix other than \`$ac_default_prefix' using \`--prefix', for instance \`--prefix=\$HOME'. For better control, use the options below. Fine tuning of the installation directories: --bindir=DIR user executables [EPREFIX/bin] --sbindir=DIR system admin executables [EPREFIX/sbin] --libexecdir=DIR program executables [EPREFIX/libexec] --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] --datadir=DIR read-only architecture-independent data [DATAROOTDIR] --infodir=DIR info documentation [DATAROOTDIR/info] --localedir=DIR locale-dependent data [DATAROOTDIR/locale] --mandir=DIR man documentation [DATAROOTDIR/man] --docdir=DIR documentation root [DATAROOTDIR/doc/PACKAGE] --htmldir=DIR html documentation [DOCDIR] --dvidir=DIR dvi documentation [DOCDIR] --pdfdir=DIR pdf documentation [DOCDIR] --psdir=DIR ps documentation [DOCDIR] _ACEOF cat <<\_ACEOF _ACEOF fi if test -n "$ac_init_help"; then cat <<\_ACEOF Optional Features: --disable-option-checking ignore unrecognized --enable/--with options --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --disable-threading Disable threading Some influential environment variables: CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory LIBS libraries to pass to the linker, e.g. -l CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory CPP C preprocessor Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. Report bugs to the package provider. _ACEOF ac_status=$? fi if test "$ac_init_help" = "recursive"; then # If there are subdirs, report their specific --help. for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue test -d "$ac_dir" || { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || continue ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } # Check for guested configure. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive elif test -f "$ac_srcdir/configure"; then echo && $SHELL "$ac_srcdir/configure" --help=recursive else $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF configure generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF exit fi ## ------------------------ ## ## Autoconf initialization. ## ## ------------------------ ## # ac_fn_c_try_compile LINENO # -------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_compile # ac_fn_c_try_link LINENO # ----------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext }; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would # interfere with the next link command; also delete a directory that is # left behind by Apple's compiler. We do this before executing the actions. rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_link # ac_fn_c_try_cpp LINENO # ---------------------- # Try to preprocess conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_cpp () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } > conftest.i && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_cpp # ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES # ------------------------------------------------------- # Tests whether HEADER exists, giving a warning if it cannot be compiled using # the include files in INCLUDES and setting the cache variable VAR # accordingly. ac_fn_c_check_header_mongrel () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if eval \${$3+:} false; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } else # Is the header compilable? { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 $as_echo_n "checking $2 usability... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_header_compiler=yes else ac_header_compiler=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 $as_echo "$ac_header_compiler" >&6; } # Is the header present? { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 $as_echo_n "checking $2 presence... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include <$2> _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : ac_header_preproc=yes else ac_header_preproc=no fi rm -f conftest.err conftest.i conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 $as_echo "$ac_header_preproc" >&6; } # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( yes:no: ) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 $as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} ;; no:yes:* ) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 $as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 $as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 $as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 $as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else eval "$3=\$ac_header_compiler" fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_mongrel # ac_fn_c_try_run LINENO # ---------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. Assumes # that executables *can* be run. ac_fn_c_try_run () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then : ac_retval=0 else $as_echo "$as_me: program exited with status $ac_status" >&5 $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=$ac_status fi rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_run # ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES # ------------------------------------------------------- # Tests whether HEADER exists and can be compiled using the include files in # INCLUDES, setting the cache variable VAR accordingly. ac_fn_c_check_header_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> _ACEOF if ac_fn_c_try_compile "$LINENO"; then : eval "$3=yes" else eval "$3=no" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_compile cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by $as_me, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ _ACEOF exec 5>>config.log { cat <<_ASUNAME ## --------- ## ## Platform. ## ## --------- ## hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` /bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` /bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` /usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` /bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` /bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` _ASUNAME as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. $as_echo "PATH: $as_dir" done IFS=$as_save_IFS } >&5 cat >&5 <<_ACEOF ## ----------- ## ## Core tests. ## ## ----------- ## _ACEOF # Keep a trace of the command line. # Strip out --no-create and --no-recursion so they do not pile up. # Strip out --silent because we don't want to record it for future runs. # Also quote any args containing shell meta-characters. # Make two passes to allow for proper duplicate-argument suppression. ac_configure_args= ac_configure_args0= ac_configure_args1= ac_must_keep_next=false for ac_pass in 1 2 do for ac_arg do case $ac_arg in -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; 2) as_fn_append ac_configure_args1 " '$ac_arg'" if test $ac_must_keep_next = true; then ac_must_keep_next=false # Got value, back to normal. else case $ac_arg in *=* | --config-cache | -C | -disable-* | --disable-* \ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ | -with-* | --with-* | -without-* | --without-* | --x) case "$ac_configure_args0 " in "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; esac ;; -* ) ac_must_keep_next=true ;; esac fi as_fn_append ac_configure_args " '$ac_arg'" ;; esac done done { ac_configure_args0=; unset ac_configure_args0;} { ac_configure_args1=; unset ac_configure_args1;} # When interrupted or exit'd, cleanup temporary files, and complete # config.log. We remove comments because anyway the quotes in there # would cause problems or look ugly. # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? # Save into config.log some information that might help in debugging. { echo $as_echo "## ---------------- ## ## Cache variables. ## ## ---------------- ##" echo # The following way of writing the cache mishandles newlines in values, ( for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( *${as_nl}ac_space=\ *) sed -n \ "s/'\''/'\''\\\\'\'''\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" ;; #( *) sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) echo $as_echo "## ----------------- ## ## Output variables. ## ## ----------------- ##" echo for ac_var in $ac_subst_vars do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then $as_echo "## ------------------- ## ## File substitutions. ## ## ------------------- ##" echo for ac_var in $ac_subst_files do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then $as_echo "## ----------- ## ## confdefs.h. ## ## ----------- ##" echo cat confdefs.h echo fi test "$ac_signal" != 0 && $as_echo "$as_me: caught signal $ac_signal" $as_echo "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && exit $exit_status ' 0 for ac_signal in 1 2 13 15; do trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal done ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h $as_echo "/* confdefs.h */" > confdefs.h # Predefined preprocessor variables. cat >>confdefs.h <<_ACEOF #define PACKAGE_NAME "$PACKAGE_NAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_TARNAME "$PACKAGE_TARNAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_VERSION "$PACKAGE_VERSION" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_STRING "$PACKAGE_STRING" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_URL "$PACKAGE_URL" _ACEOF # Let the site file select an alternate cache file if it wants to. # Prefer an explicitly selected file to automatically selected ones. ac_site_file1=NONE ac_site_file2=NONE if test -n "$CONFIG_SITE"; then # We do not want a PATH search for config.site. case $CONFIG_SITE in #(( -*) ac_site_file1=./$CONFIG_SITE;; */*) ac_site_file1=$CONFIG_SITE;; *) ac_site_file1=./$CONFIG_SITE;; esac elif test "x$prefix" != xNONE; then ac_site_file1=$prefix/share/config.site ac_site_file2=$prefix/etc/config.site else ac_site_file1=$ac_default_prefix/share/config.site ac_site_file2=$ac_default_prefix/etc/config.site fi for ac_site_file in "$ac_site_file1" "$ac_site_file2" do test "x$ac_site_file" = xNONE && continue if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 $as_echo "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" \ || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "failed to load site script $ac_site_file See \`config.log' for more details" "$LINENO" 5; } fi done if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special files # actually), so we avoid doing that. DJGPP emulates it as a regular file. if test /dev/null != "$cache_file" && test -f "$cache_file"; then { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 $as_echo "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 $as_echo "$as_me: creating cache $cache_file" >&6;} >$cache_file fi # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false for ac_var in $ac_precious_vars; do eval ac_old_set=\$ac_cv_env_${ac_var}_set eval ac_new_set=\$ac_env_${ac_var}_set eval ac_old_val=\$ac_cv_env_${ac_var}_value eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) if test "x$ac_old_val" != "x$ac_new_val"; then # differences in whitespace do not lead to failure. ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 $as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 $as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 $as_echo "$as_me: former value: \`$ac_old_val'" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 $as_echo "$as_me: current value: \`$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. *) as_fn_append ac_configure_args " '$ac_arg'" ;; esac fi done if $ac_cache_corrupted; then { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 $as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 fi ## -------------------- ## ## Main body of script. ## ## -------------------- ## ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}gcc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="gcc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi else CC="$ac_cv_prog_CC" fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}cc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else ac_prog_rejected=no as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# != 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then for ac_prog in cl.exe do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC for ac_prog in cl.exe do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_CC" && break done if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi fi fi test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "no acceptable C compiler found in \$PATH See \`config.log' for more details" "$LINENO" 5; } # Provide some information about the compiler. $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then sed '10a\ ... rest of stderr output deleted ... 10q' conftest.err >conftest.er1 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 $as_echo_n "checking whether the C compiler works... " >&6; } ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # The possible output files: ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" ac_rmfiles= for ac_file in $ac_files do case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; * ) ac_rmfiles="$ac_rmfiles $ac_file";; esac done rm -f $ac_rmfiles if { { ac_try="$ac_link_default" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link_default") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, # so that the user can short-circuit this test for compilers unknown to # Autoconf. for ac_file in $ac_files '' do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; [ab].out ) # We found the default executable, but exeext='' is most # certainly right. break;; *.* ) if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi # We set ac_cv_exeext here because the later test for it is not # safe: cross compilers may not add the suffix if given an `-o' # argument, so we may need to know it at that point already. # Even if this section looks crufty: it has the advantage of # actually working. break;; * ) break;; esac done test "$ac_cv_exeext" = no && ac_cv_exeext= else ac_file='' fi if test -z "$ac_file"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "C compiler cannot create executables See \`config.log' for more details" "$LINENO" 5; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 $as_echo_n "checking for C compiler default output file name... " >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 $as_echo "$ac_file" >&6; } ac_exeext=$ac_cv_exeext rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 $as_echo_n "checking for suffix of executables... " >&6; } if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with # `rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` break;; * ) break;; esac done else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of executables: cannot compile and link See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest conftest$ac_cv_exeext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 $as_echo "$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext ac_exeext=$EXEEXT cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main () { FILE *f = fopen ("conftest.out", "w"); return ferror (f) || fclose (f) != 0; ; return 0; } _ACEOF ac_clean_files="$ac_clean_files conftest.out" # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 $as_echo_n "checking whether we are cross compiling... " >&6; } if test "$cross_compiling" != yes; then { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if { ac_try='./conftest$ac_cv_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details" "$LINENO" 5; } fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 $as_echo "$cross_compiling" >&6; } rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out ac_clean_files=$ac_clean_files_save { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 $as_echo_n "checking for suffix of object files... " >&6; } if ${ac_cv_objext+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.o conftest.obj if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` break;; esac done else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of object files: cannot compile See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 $as_echo "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 $as_echo_n "checking whether we are using the GNU C compiler... " >&6; } if ${ac_cv_c_compiler_gnu+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_compiler_gnu=yes else ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 $as_echo "$ac_cv_c_compiler_gnu" >&6; } if test $ac_compiler_gnu = yes; then GCC=yes else GCC= fi ac_test_CFLAGS=${CFLAGS+set} ac_save_CFLAGS=$CFLAGS { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 $as_echo_n "checking whether $CC accepts -g... " >&6; } if ${ac_cv_prog_cc_g+:} false; then : $as_echo_n "(cached) " >&6 else ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_g=yes else CFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : else ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_g=yes fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 $as_echo "$ac_cv_prog_cc_g" >&6; } if test "$ac_test_CFLAGS" = set; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 $as_echo_n "checking for $CC option to accept ISO C89... " >&6; } if ${ac_cv_prog_cc_c89+:} false; then : $as_echo_n "(cached) " >&6 else ac_cv_prog_cc_c89=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include struct stat; /* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ struct buf { int x; }; FILE * (*rcsopen) (struct buf *, struct stat *, int); static char *e (p, i) char **p; int i; { return p[i]; } static char *f (char * (*g) (char **, int), char **p, ...) { char *s; va_list v; va_start (v,p); s = g (p, va_arg (v,int)); va_end (v); return s; } /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not '\xHH' hex character constants. These don't provoke an error unfortunately, instead are silently treated as 'x'. The following induces an error, until -std is added to get proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an array size at least. It's necessary to write '\x00'==0 to get something that's true only with -std. */ int osf4_cc_array ['\x00' == 0 ? 1 : -1]; /* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters inside strings and character constants. */ #define FOO(x) 'x' int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); int argc; char **argv; int main () { return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; ; return 0; } _ACEOF for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_c89=$ac_arg fi rm -f core conftest.err conftest.$ac_objext test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi # AC_CACHE_VAL case "x$ac_cv_prog_cc_c89" in x) { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 $as_echo "none needed" >&6; } ;; xno) { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 $as_echo "unsupported" >&6; } ;; *) CC="$CC $ac_cv_prog_cc_c89" { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 $as_echo "$ac_cv_prog_cc_c89" >&6; } ;; esac if test "x$ac_cv_prog_cc_c89" != xno; then : fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 $as_echo_n "checking how to run the C preprocessor... " >&6; } # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if ${ac_cv_prog_CPP+:} false; then : $as_echo_n "(cached) " >&6 else # Double quotes because CPP needs to be expanded for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : else # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : # Broken: success on invalid input. continue else # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok; then : break fi done ac_cv_prog_CPP=$CPP fi CPP=$ac_cv_prog_CPP else ac_cv_prog_CPP=$CPP fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 $as_echo "$CPP" >&6; } ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : else # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : # Broken: success on invalid input. continue else # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok; then : else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details" "$LINENO" 5; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 $as_echo_n "checking for grep that handles long lines and -e... " >&6; } if ${ac_cv_path_GREP+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$GREP"; then ac_path_GREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in grep ggrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_GREP" || continue # Check for GNU ac_path_GREP and select it if it is found. # Check for GNU $ac_path_GREP case `"$ac_path_GREP" --version 2>&1` in *GNU*) ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'GREP' >> "conftest.nl" "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_GREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_GREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_GREP"; then as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_GREP=$GREP fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 $as_echo "$ac_cv_path_GREP" >&6; } GREP="$ac_cv_path_GREP" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 $as_echo_n "checking for egrep... " >&6; } if ${ac_cv_path_EGREP+:} false; then : $as_echo_n "(cached) " >&6 else if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 then ac_cv_path_EGREP="$GREP -E" else if test -z "$EGREP"; then ac_path_EGREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in egrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_EGREP" || continue # Check for GNU ac_path_EGREP and select it if it is found. # Check for GNU $ac_path_EGREP case `"$ac_path_EGREP" --version 2>&1` in *GNU*) ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'EGREP' >> "conftest.nl" "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_EGREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_EGREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_EGREP"; then as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_EGREP=$EGREP fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 $as_echo "$ac_cv_path_EGREP" >&6; } EGREP="$ac_cv_path_EGREP" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 $as_echo_n "checking for ANSI C header files... " >&6; } if ${ac_cv_header_stdc+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #include #include int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_header_stdc=yes else ac_cv_header_stdc=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "memchr" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "free" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : : else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #else # define ISLOWER(c) \ (('a' <= (c) && (c) <= 'i') \ || ('j' <= (c) && (c) <= 'r') \ || ('s' <= (c) && (c) <= 'z')) # define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) #endif #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) return 2; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : else ac_cv_header_stdc=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 $as_echo "$ac_cv_header_stdc" >&6; } if test $ac_cv_header_stdc = yes; then $as_echo "#define STDC_HEADERS 1" >>confdefs.h fi # On IRIX 5.3, sys/types and inttypes.h are conflicting. for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ inttypes.h stdint.h unistd.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default " if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done have_zlib=no { $as_echo "$as_me:${as_lineno-$LINENO}: checking for main in -lz" >&5 $as_echo_n "checking for main in -lz... " >&6; } if ${ac_cv_lib_z_main+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lz $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { return main (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_z_main=yes else ac_cv_lib_z_main=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_main" >&5 $as_echo "$ac_cv_lib_z_main" >&6; } if test "x$ac_cv_lib_z_main" = xyes; then : ac_fn_c_check_header_mongrel "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "$ac_includes_default" if test "x$ac_cv_header_zlib_h" = xyes; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking if zlib version >= 1.1.3" >&5 $as_echo_n "checking if zlib version >= 1.1.3... " >&6; } if test "$cross_compiling" = yes; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include "confdefs.h" #include #include int main() { #ifdef ZLIB_VERSION return(strcmp(ZLIB_VERSION, "1.1.3") < 0); #else return(1); #endif } _ACEOF if ac_fn_c_try_run "$LINENO"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } have_zlib=yes else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi fi if test "${have_zlib}" = yes; then $as_echo "#define HAVE_ZLIB 1" >>confdefs.h LIBS='-lz '$LIBS fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 $as_echo_n "checking for pthread_create in -lpthread... " >&6; } if ${ac_cv_lib_pthread_pthread_create+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lpthread $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char pthread_create (); int main () { return pthread_create (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_pthread_pthread_create=yes else ac_cv_lib_pthread_pthread_create=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 $as_echo "$ac_cv_lib_pthread_pthread_create" >&6; } if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_LIBPTHREAD 1 _ACEOF LIBS="-lpthread $LIBS" fi cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char pthread_create (); int main () { return pthread_create (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : use_pthreads=yes else use_pthreads=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can use pthreads" >&5 $as_echo_n "checking if we can use pthreads... " >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: $use_pthreads" >&5 $as_echo "$use_pthreads" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking if PTHREAD_STACK_MIN is defined" >&5 $as_echo_n "checking if PTHREAD_STACK_MIN is defined... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include int main () {size_t stacksize = PTHREAD_STACK_MIN + 0x4000; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : use_pthread_stack_min=yes else use_pthread_stack_min=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $use_pthread_stack_min" >&5 $as_echo "$use_pthread_stack_min" >&6; } if test "x$use_pthread_stack_min" = xno; then use_pthreads=no fi # Check whether --enable-threading was given. if test "${enable_threading+set}" = set; then : enableval=$enable_threading; fi if test "x$enable_threading" != "xno" ; then : if test "x${use_pthreads}" = "xno"; then { $as_echo "$as_me:${as_lineno-$LINENO}: ------------------------------------------" >&5 $as_echo "$as_me: ------------------------------------------" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: Unable to find pthreads on this system. " >&5 $as_echo "$as_me: Unable to find pthreads on this system. " >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: Building a single-threaded version. " >&5 $as_echo "$as_me: Building a single-threaded version. " >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: ------------------------------------------" >&5 $as_echo "$as_me: ------------------------------------------" >&6;} fi if test "x${use_pthreads}" = "xyes"; then { $as_echo "$as_me:${as_lineno-$LINENO}: Enabling threading for affyio" >&5 $as_echo "$as_me: Enabling threading for affyio" >&6;} $as_echo "#define USE_PTHREADS 1" >>confdefs.h fi else { $as_echo "$as_me:${as_lineno-$LINENO}: Disabling threading for affyio" >&5 $as_echo "$as_me: Disabling threading for affyio" >&6;} fi ac_config_files="$ac_config_files src/Makevars" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs, see configure's option --config-cache. # It is not useful on other systems. If it contains results you don't # want to keep, you may remove or edit it. # # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # # `ac_cv_env_foo' variables (set or unset) will be overridden when # loading this file, other *unset* `ac_cv_foo' will be assigned the # following values. _ACEOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, we kill variables containing newlines. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. ( for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space=' '; set) 2>&1` in #( *${as_nl}ac_space=\ *) # `set' does not quote correctly, so add quotes: double-quote # substitution turns \\\\ into \\, and sed turns \\ into \. sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; #( *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) | sed ' /^ac_cv_env_/b end t clear :clear s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then if test "x$cache_file" != "x/dev/null"; then { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 $as_echo "$as_me: updating cache $cache_file" >&6;} if test ! -f "$cache_file" || test -h "$cache_file"; then cat confcache >"$cache_file" else case $cache_file in #( */* | ?:*) mv -f confcache "$cache_file"$$ && mv -f "$cache_file"$$ "$cache_file" ;; #( *) mv -f confcache "$cache_file" ;; esac fi fi else { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 $as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' # Transform confdefs.h into DEFS. # Protect against shell expansion while executing Makefile rules. # Protect against Makefile macro expansion. # # If the first sed substitution is executed (which looks for macros that # take arguments), then branch to the quote section. Otherwise, # look for a macro that doesn't take arguments. ac_script=' :mline /\\$/{ N s,\\\n,, b mline } t clear :clear s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g t quote s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g t quote b any :quote s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g s/\[/\\&/g s/\]/\\&/g s/\$/$$/g H :any ${ g s/^\n// s/\n/ /g p } ' DEFS=`sed -n "$ac_script" confdefs.h` ac_libobjs= ac_ltlibobjs= U= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' ac_i=`$as_echo "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' done LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" { $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 $as_echo "$as_me: creating $CONFIG_STATUS" >&6;} as_write_fail=0 cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 #! $SHELL # Generated by $as_me. # Run this file to recreate the current configuration. # Compiler output produced by configure, useful for debugging # configure, is in config.log if it exists. debug=false ac_cs_recheck=false ac_cs_silent=false SHELL=\${CONFIG_SHELL-$SHELL} export SHELL _ASEOF cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo # Prefer a ksh shell builtin over an external printf program on Solaris, # but without wasting forks for bash or zsh. if test -z "$BASH_VERSION$ZSH_VERSION" \ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='print -r --' as_echo_n='print -rn --' elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in #( *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # Unset variables that we do not need and which cause bugs (e.g. in # pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" # suppresses any "Segmentation fault" message there. '((' could # trigger a bug in pdksh 5.2.14. for as_var in BASH_ENV ENV MAIL MAILPATH do eval test x\${$as_var+set} = xset \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # CDPATH. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi $as_echo "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : eval 'as_fn_append () { eval $1+=\$2 }' else as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : eval 'as_fn_arith () { as_val=$(( $* )) }' else as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -pR' fi else as_ln_s='cp -pR' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi # as_fn_executable_p FILE # ----------------------- # Test if FILE is an executable regular file. as_fn_executable_p () { test -f "$1" && test -x "$1" } # as_fn_executable_p as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" exec 6>&1 ## ----------------------------------- ## ## Main body of $CONFIG_STATUS script. ## ## ----------------------------------- ## _ASEOF test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Save the log message, to keep $0 and so on meaningful, and to # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" This file was extended by $as_me, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS CONFIG_LINKS = $CONFIG_LINKS CONFIG_COMMANDS = $CONFIG_COMMANDS $ $0 $@ on `(hostname || uname -n) 2>/dev/null | sed 1q` " _ACEOF case $ac_config_files in *" "*) set x $ac_config_files; shift; ac_config_files=$*;; esac cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # Files that config.status was made for. config_files="$ac_config_files" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ac_cs_usage="\ \`$as_me' instantiates files and other configuration actions from templates according to the current configuration. Unless the files and actions are specified as TAGs, all are instantiated by default. Usage: $0 [OPTION]... [TAG]... -h, --help print this help, then exit -V, --version print version number and configuration settings, then exit --config print configuration, then exit -q, --quiet, --silent do not print progress messages -d, --debug don't remove temporary files --recheck update $as_me by reconfiguring in the same conditions --file=FILE[:TEMPLATE] instantiate the configuration file FILE Configuration files: $config_files Report bugs to the package provider." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ config.status configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" Copyright (C) 2012 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." ac_pwd='$ac_pwd' srcdir='$srcdir' test -n "\$AWK" || AWK=awk _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # The default lists apply if the user does not specify any file. ac_need_defaults=: while test $# != 0 do case $1 in --*=?*) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` ac_shift=: ;; --*=) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg= ac_shift=: ;; *) ac_option=$1 ac_optarg=$2 ac_shift=shift ;; esac case $ac_option in # Handling of the options. -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) $as_echo "$ac_cs_version"; exit ;; --config | --confi | --conf | --con | --co | --c ) $as_echo "$ac_cs_config"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift case $ac_optarg in *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; '') as_fn_error $? "missing file argument" ;; esac as_fn_append CONFIG_FILES " '$ac_optarg'" ac_need_defaults=false;; --he | --h | --help | --hel | -h ) $as_echo "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; # This is an error. -*) as_fn_error $? "unrecognized option: \`$1' Try \`$0 --help' for more information." ;; *) as_fn_append ac_config_targets " $1" ac_need_defaults=false ;; esac shift done ac_configure_extra_args= if $ac_cs_silent; then exec 6>/dev/null ac_configure_extra_args="$ac_configure_extra_args --silent" fi _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 if \$ac_cs_recheck; then set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion shift \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 CONFIG_SHELL='$SHELL' export CONFIG_SHELL exec "\$@" fi _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 exec 5>>config.log { echo sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX $as_echo "$ac_log" } >&5 _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Handling of arguments. for ac_config_target in $ac_config_targets do case $ac_config_target in "src/Makevars") CONFIG_FILES="$CONFIG_FILES src/Makevars" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac done # If the user did not use the arguments to specify the items to instantiate, # then the envvar interface is used. Set only those that are not. # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files fi # Have a temporary directory for convenience. Make it in the build tree # simply because there is no reason against having it here, and in addition, # creating and moving files from /tmp can sometimes cause problems. # Hook for its removal unless debugging. # Note that there is a small window in which the directory will not be cleaned: # after its creation but before its name has been assigned to `$tmp'. $debug || { tmp= ac_tmp= trap 'exit_status=$? : "${ac_tmp:=$tmp}" { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status ' 0 trap 'as_fn_exit 1' 1 2 13 15 } # Create a (secure) tmp directory for tmp files. { tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && test -d "$tmp" } || { tmp=./conf$$-$RANDOM (umask 077 && mkdir "$tmp") } || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 ac_tmp=$tmp # Set up the scripts for CONFIG_FILES section. # No need to generate them if there are no CONFIG_FILES. # This happens for instance with `./config.status config.h'. if test -n "$CONFIG_FILES"; then ac_cr=`echo X | tr X '\015'` # On cygwin, bash can eat \r inside `` if the user requested igncr. # But we know of no other shell where ac_cr would be empty at this # point, so we can use a bashism as a fallback. if test "x$ac_cr" = x; then eval ac_cr=\$\'\\r\' fi ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then ac_cs_awk_cr='\\r' else ac_cs_awk_cr=$ac_cr fi echo 'BEGIN {' >"$ac_tmp/subs1.awk" && _ACEOF { echo "cat >conf$$subs.awk <<_ACEOF" && echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && echo "_ACEOF" } >conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` ac_delim='%!_!# ' for ac_last_try in false false false false false :; do . ./conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` if test $ac_delim_n = $ac_delim_num; then break elif $ac_last_try; then as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done rm -f conf$$subs.sh cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && _ACEOF sed -n ' h s/^/S["/; s/!.*/"]=/ p g s/^[^!]*!// :repl t repl s/'"$ac_delim"'$// t delim :nl h s/\(.\{148\}\)..*/\1/ t more1 s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ p n b repl :more1 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t nl :delim h s/\(.\{148\}\)..*/\1/ t more2 s/["\\]/\\&/g; s/^/"/; s/$/"/ p b :more2 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t delim ' >$CONFIG_STATUS || ac_write_fail=1 rm -f conf$$subs.awk cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACAWK cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && for (key in S) S_is_set[key] = 1 FS = "" } { line = $ 0 nfields = split(line, field, "@") substed = 0 len = length(field[1]) for (i = 2; i < nfields; i++) { key = field[i] keylen = length(key) if (S_is_set[key]) { value = S[key] line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) len += length(value) + length(field[++i]) substed = 1 } else len += 1 + keylen } print line } _ACAWK _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" else cat fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 _ACEOF # VPATH may cause trouble with some makes, so we remove sole $(srcdir), # ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and # trailing colons and then remove the whole line if VPATH becomes empty # (actually we leave an empty line to preserve line numbers). if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ h s/// s/^/:/ s/[ ]*$/:/ s/:\$(srcdir):/:/g s/:\${srcdir}:/:/g s/:@srcdir@:/:/g s/^:*// s/:*$// x s/\(=[ ]*\).*/\1/ G s/\n// s/^[^=]*=[ ]*$// }' fi cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 fi # test -n "$CONFIG_FILES" eval set X " :F $CONFIG_FILES " shift for ac_tag do case $ac_tag in :[FHLC]) ac_mode=$ac_tag; continue;; esac case $ac_mode$ac_tag in :[FHL]*:*);; :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; :[FH]-) ac_tag=-:-;; :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; esac ac_save_IFS=$IFS IFS=: set x $ac_tag IFS=$ac_save_IFS shift ac_file=$1 shift case $ac_mode in :L) ac_source=$1;; :[FH]) ac_file_inputs= for ac_f do case $ac_f in -) ac_f="$ac_tmp/stdin";; *) # Look for the file first in the build tree, then in the source tree # (if the path is not absolute). The absolute path cannot be DOS-style, # because $ac_f cannot contain `:'. test -f "$ac_f" || case $ac_f in [\\/$]*) false;; *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; esac || as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; esac case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac as_fn_append ac_file_inputs " '$ac_f'" done # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' `' by configure.' if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 $as_echo "$as_me: creating $ac_file" >&6;} fi # Neutralize special characters interpreted by sed in replacement strings. case $configure_input in #( *\&* | *\|* | *\\* ) ac_sed_conf_input=`$as_echo "$configure_input" | sed 's/[\\\\&|]/\\\\&/g'`;; #( *) ac_sed_conf_input=$configure_input;; esac case $ac_tag in *:-:* | *:-) cat >"$ac_tmp/stdin" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; esac ;; esac ac_dir=`$as_dirname -- "$ac_file" || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` as_dir="$ac_dir"; as_fn_mkdir_p ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix case $ac_mode in :F) # # CONFIG_FILE # _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # If the template does not know about datarootdir, expand it. # FIXME: This hack should be removed a few years after 2.60. ac_datarootdir_hack=; ac_datarootdir_seen= ac_sed_dataroot=' /datarootdir/ { p q } /@datadir@/p /@docdir@/p /@infodir@/p /@localedir@/p /@mandir@/p' case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 $as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_datarootdir_hack=' s&@datadir@&$datadir&g s&@docdir@&$docdir&g s&@infodir@&$infodir&g s&@localedir@&$localedir&g s&@mandir@&$mandir&g s&\\\${datarootdir}&$datarootdir&g' ;; esac _ACEOF # Neutralize VPATH when `$srcdir' = `.'. # Shell code in configure.ac might set extrasub. # FIXME: do we really want to maintain this feature? cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_sed_extra="$ac_vpsub $extrasub _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 :t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b s|@configure_input@|$ac_sed_conf_input|;t t s&@top_builddir@&$ac_top_builddir_sub&;t t s&@top_build_prefix@&$ac_top_build_prefix&;t t s&@srcdir@&$ac_srcdir&;t t s&@abs_srcdir@&$ac_abs_srcdir&;t t s&@top_srcdir@&$ac_top_srcdir&;t t s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t s&@builddir@&$ac_builddir&;t t s&@abs_builddir@&$ac_abs_builddir&;t t s&@abs_top_builddir@&$ac_abs_top_builddir&;t t $ac_datarootdir_hack " eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ "$ac_tmp/out"`; test -z "$ac_out"; } && { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&5 $as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&2;} rm -f "$ac_tmp/stdin" case $ac_file in -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; esac \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; esac done # for ac_tag as_fn_exit 0 _ACEOF ac_clean_files=$ac_clean_files_save test $ac_write_fail = 0 || as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 # configure is writing to config.log, and then calls config.status. # config.status does its own redirection, appending to config.log. # Unfortunately, on DOS this fails, as config.log is still kept open # by configure, so config.status won't be able to write to it; its # output is simply discarded. So we exec the FD to /dev/null, # effectively closing config.log, so it can be properly (re)opened and # appended to by config.status. When coming back to configure, we # need to make the FD available again. if test "$no_create" != yes; then ac_cs_success=: ac_config_status_args= test "$silent" = yes && ac_config_status_args="$ac_config_status_args --quiet" exec 5>/dev/null $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false exec 5>>config.log # Use ||, not &&, to avoid exiting from the if with $? = 1, which # would make configure fail if this is the last instruction. $ac_cs_success || as_fn_exit 1 fi if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi affyio/configure.in0000755000175400017540000000265613556116171015410 0ustar00biocbuildbiocbuilddnl dnl Configuration things for affyio dnl AC_INIT("DESCRIPTION") dnl dnl Are things (still) the same ? dnl (taken from the 'writing R extensions manual') R_ZLIB AC_LANG(C) AC_CHECK_LIB(pthread, pthread_create) AC_TRY_LINK_FUNC(pthread_create, [use_pthreads=yes], [use_pthreads=no]) AC_MSG_CHECKING([if we can use pthreads]) AC_MSG_RESULT($use_pthreads) AC_MSG_CHECKING([if PTHREAD_STACK_MIN is defined]) AC_COMPILE_IFELSE([AC_LANG_SOURCE([ #include #include int main () {size_t stacksize = PTHREAD_STACK_MIN + 0x4000; } ])],[use_pthread_stack_min=yes], [use_pthread_stack_min=no]) AC_MSG_RESULT($use_pthread_stack_min) if test "x$use_pthread_stack_min" = xno; then use_pthreads=no fi AC_ARG_ENABLE([threading], AS_HELP_STRING([--disable-threading],[Disable threading])) AS_IF([test "x$enable_threading" != "xno" ],[ if test "x${use_pthreads}" = "xno"; then AC_MSG_NOTICE(------------------------------------------) AC_MSG_NOTICE( Unable to find pthreads on this system. ) AC_MSG_NOTICE( Building a single-threaded version. ) AC_MSG_NOTICE(------------------------------------------) fi if test "x${use_pthreads}" = "xyes"; then AC_MSG_NOTICE(Enabling threading for affyio) AC_DEFINE(USE_PTHREADS, 1) fi ], [ AC_MSG_NOTICE(Disabling threading for affyio) ]) AC_OUTPUT(src/Makevars) affyio/man/0000755000175400017540000000000013556116171013636 5ustar00biocbuildbiocbuildaffyio/man/check.cdf.type.Rd0000644000175400017540000000114413556116171016715 0ustar00biocbuildbiocbuild\name{check.cdf.type} \alias{check.cdf.type} \title{CDF file format function} \description{This function returns a text string giving the file format for the supplied filename } \usage{check.cdf.type(filename) } \arguments{ \item{filename}{fullpath to a cdf file} } \value{Returns a string which is currently one of: \item{text}{the cdf file is of the text format} \item{xda}{the cdf file is of the binary format used in GCOS} \item{unknown}{the parser can not handle this format or does not recognize this file as a CDF file} } \author{B. M. Bolstad } \keyword{IO} affyio/man/get.celfile.dates.Rd0000644000175400017540000000161613556116171017411 0ustar00biocbuildbiocbuild\name{get.celfile.dates} \alias{get.celfile.dates} \title{Extract Dates from CEL files} \description{This function reads the header information for a series of CEL files then extracts and returns the dates.} \usage{ get.celfile.dates(filenames, ...) } \arguments{ \item{filenames}{a vector of characters with the CEL filenames. May be fully pathed.} \item{\dots}{further arguments passed on to \code{\link{read.celfile.header}}.} } \details{ The function uses \code{\link{read.celfile.header}} to read in the header of each file. The \code{ScanDate} component is then parsed to extract the date. Note that an assumption is made about the format. Namely, that dates are in the Y-m-d or m/d/y format.} \value{ A vector of class \code{\link{Date}} with one date for each celfile.} \author{ Rafael A. Irizarry } \seealso{ See Also as \code{\link{read.celfile.header}}. } \keyword{IO} affyio/man/read.cdffile.list.Rd0000644000175400017540000000115013556116171017402 0ustar00biocbuildbiocbuild\name{read.cdffile.list} \alias{read.cdffile.list} \title{Read CDF file into an R list} \description{This function reads the entire contents of a cdf file into an R list structure } \usage{read.cdffile.list(filename, cdf.path = getwd()) } \arguments{ \item{filename}{name of CDF file} \item{cdf.path}{path to cdf file} } \value{returns a \code{list} structure. The exact contents may vary depending on the file format of the cdf file (see \code{\link{check.cdf.type}}) } \details{ Note that this function can be very memory intensive with large CDF files. } \author{B. M. Bolstad } \keyword{IO} affyio/man/read.celfile.Rd0000644000175400017540000000136613556116171016450 0ustar00biocbuildbiocbuild\name{read.celfile} \alias{read.celfile} \title{Read a CEL file into an R list} \description{This function reads the entire contents of a CEL file into an R list structure } \usage{read.celfile(filename,intensity.means.only=FALSE) } \arguments{ \item{filename}{name of CEL file} \item{intensity.means.only}{If \code{TRUE} then read on only the MEAN section in INTENSITY} } \value{returns a \code{list} structure. The exact contents may vary depending on the file format of the CEL file } \details{ The list has four main items. HEADER, INTENSITY, MASKS, OUTLIERS. Note that INTENSITY is a list of three vectors MEAN, STDEV, NPIXELS. HEADER is also a list. Both of MASKS and OUTLIERS are matrices. } \author{B. M. Bolstad } \keyword{IO} affyio/man/read.celfile.header.Rd0000644000175400017540000000146413556116171017676 0ustar00biocbuildbiocbuild\name{read.celfile.header} \alias{read.celfile.header} \title{Read header information from cel file} \description{ This function reads some of the header information (appears before probe intensity data) from the supplied cel file. } \usage{read.celfile.header(filename,info=c("basic","full"),verbose=FALSE) } \arguments{ \item{filename}{name of CEL file. May be fully pathed} \item{info}{A string. \code{basic} returns the dimensions of the chip and the name of the CDF file used when the CEL file was produced. \code{full} returns more information in greater detail.} \item{verbose}{a \code{\link{logical}}. When true the parsing routine prints more information, typically useful for debugging.} } \value{ A \code{list} data structure. } \author{B. M. Bolstad } \keyword{IO} affyio/man/read.celfile.probeintensity.matrices.Rd0000644000175400017540000000272513556116171023333 0ustar00biocbuildbiocbuild\name{read.celfile.probeintensity.matrices} \alias{read.celfile.probeintensity.matrices} \title{Read PM or MM from CEL file into matrices} \description{This function reads PM, MM or both types of intensities into matrices. These matrices have all the probes for a probeset in adjacent rows } \usage{read.celfile.probeintensity.matrices(filenames, cdfInfo, rm.mask=FALSE, rm.outliers=FALSE, rm.extra=FALSE, verbose=FALSE, which= c("pm","mm","both")) } \arguments{ \item{filenames}{a character vector of filenames} \item{cdfInfo}{a list with items giving PM and MM locations for desired probesets. In same structure as returned by \code{\link[makecdfenv]{make.cdf.package}}} \item{rm.mask}{a \code{\link{logical}}. Return these probes as NA if there are in the [MASK] section of the CEL file} \item{rm.outliers}{a \code{\link{logical}}. Return these probes as NA if there are in the [OUTLIERS] section of the CEL file}. \item{rm.extra}{a \code{\link{logical}}. Return these probes as NA if there are in the [OUTLIERS] section of the CEL file}. \item{verbose}{a \code{\link{logical}}. When true the parsing routine prints more information, typically useful for debugging.} \item{which}{a string specifing which probe type to return} } \value{returns a \code{\link{list}} of \code{\link{matrix}} items. One matrix contains PM probe intensities, with probes in rows and arrays in columns } \author{B. M. Bolstad } \keyword{IO} affyio/man/read_abatch.Rd0000644000175400017540000000040213556116171016336 0ustar00biocbuildbiocbuild\name{internal functions} \alias{read_abatch} \alias{read_abatch_stddev} \title{Internal affyio functions} \description{Internal affyio functions} \details{These are not to be called directly by a user. They support the affy package} \keyword{internal} affyio/src/0000755000175400017540000000000013556146041013651 5ustar00biocbuildbiocbuildaffyio/src/Makevars0000644000175400017540000000066113556134576015362 0ustar00biocbuildbiocbuildPKG_CFLAGS = -g -O2 PKG_LIBS = -lpthread -lz PKG_CPPFLAGS = -DPACKAGE_NAME=\"\" -DPACKAGE_TARNAME=\"\" -DPACKAGE_VERSION=\"\" -DPACKAGE_STRING=\"\" -DPACKAGE_BUGREPORT=\"\" -DPACKAGE_URL=\"\" -DSTDC_HEADERS=1 -DHAVE_SYS_TYPES_H=1 -DHAVE_SYS_STAT_H=1 -DHAVE_STDLIB_H=1 -DHAVE_STRING_H=1 -DHAVE_MEMORY_H=1 -DHAVE_STRINGS_H=1 -DHAVE_INTTYPES_H=1 -DHAVE_STDINT_H=1 -DHAVE_UNISTD_H=1 -DHAVE_ZLIB=1 -DHAVE_LIBPTHREAD=1 -DUSE_PTHREADS=1 affyio/src/Makevars.in0000644000175400017540000000010013556116171015742 0ustar00biocbuildbiocbuildPKG_CFLAGS = @CFLAGS@ PKG_LIBS = @LIBS@ PKG_CPPFLAGS = @DEFS@ affyio/src/Makevars.win0000644000175400017540000000047213556116171016145 0ustar00biocbuildbiocbuildPKG_CPPFLAGS += -DHAVE_ZLIB ZLIB_CFLAGS+=$(shell echo 'zlibbioc::pkgconfig("PKG_CFLAGS")'|\ "${R_HOME}/bin/R" --vanilla --slave) PKG_LIBS+=$(shell echo 'zlibbioc::pkgconfig("PKG_LIBS_shared")' |\ "${R_HOME}/bin/R" --vanilla --slave) %.o: %.c $(CC) $(ZLIB_CFLAGS) $(ALL_CPPFLAGS) $(ALL_CFLAGS) -c $< -o $@ affyio/src/fread_functions.c0000644000175400017540000004041413556116171017172 0ustar00biocbuildbiocbuild #include #include #include #include #include "stdlib.h" #include "stdio.h" #include "fread_functions.h" #define HAVE_ZLIB 1 #if defined(HAVE_ZLIB) #include #endif /************************************************************************* ** ** Code for reading from the binary files, doing bit flipping if ** necessary (on big-endian machines) ** ** ************************************************************************/ size_t fread_int32(int *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(int),n,instream); #ifdef WORDS_BIGENDIAN while( n-- > 0 ){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>24)&0xff) | ((*destination&0xff)<<24) | ((*destination>>8)&0xff00) | ((*destination&0xff00)<<8)); destination++; } #endif return result; } size_t fread_uint32(unsigned int *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(unsigned int),n,instream); #ifdef WORDS_BIGENDIAN while( n-- > 0 ){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>24)&0xff) | ((*destination&0xff)<<24) | ((*destination>>8)&0xff00) | ((*destination&0xff00)<<8)); destination++; } #endif return result; } size_t fread_int16(short *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(short),n,instream); #ifdef WORDS_BIGENDIAN while( n-- > 0 ){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>8)&0xff) | ((*destination&0xff)<<8)); destination++; } #endif return result; } size_t fread_uint16(unsigned short *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(unsigned short),n,instream); #ifdef WORDS_BIGENDIAN while( n-- > 0 ){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>8)&0xff) | ((*destination&0xff)<<8)); destination++; } #endif return result; } static void swap_float_4(float *tnf4) /* 4 byte floating point numbers */ { unsigned char *cptr,tmp; cptr = (unsigned char *)tnf4; tmp = cptr[0]; cptr[0] = cptr[3]; cptr[3] =tmp; tmp = cptr[1]; cptr[1] = cptr[2]; cptr[2] = tmp; } size_t fread_float32(float *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(float),n,instream); #ifdef WORDS_BIGENDIAN while( n-- > 0 ){ swap_float_4(destination); destination++; } #endif return result; } size_t fread_char(char *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(char),n,instream); #ifdef WORDS_BIGENDIAN /* Probably don't need to do anything for characters */ #endif return result; } size_t fread_uchar(unsigned char *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(unsigned char),n,instream); #ifdef WORDS_BIGENDIAN /* Probably don't need to do anything for characters */ /* destination = ~destination; */ #endif return result; } static void swap_float_8(double *tnf8) /* 8 byte floating point numbers */ { unsigned char *cptr,tmp; cptr = (unsigned char *)tnf8; tmp = cptr[0]; cptr[0] = cptr[7]; cptr[7] = tmp; tmp = cptr[1]; cptr[1] = cptr[6]; cptr[6] = tmp; tmp = cptr[2]; cptr[2] = cptr[5]; cptr[5] =tmp; tmp = cptr[3]; cptr[3] = cptr[4]; cptr[4] = tmp; } size_t fread_double64(double *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(double),n,instream); #ifdef WORDS_BIGENDIAN while( n-- > 0 ){ swap_float_8(destination); destination++; } #endif return result; } /************************************************************************* ** ** Code for big endian data reading from the binary files, doing bit flipping if ** necessary (on little-endian machines) ** ** ************************************************************************/ size_t fread_be_int32(int *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(int),n,instream); #ifndef WORDS_BIGENDIAN while (n-- > 0){ /* bit flip since on a little endian machine */ *destination=(((*destination>>24)&0xff) | ((*destination&0xff)<<24) | ((*destination>>8)&0xff00) | ((*destination&0xff00)<<8)); destination++; } #endif return result; } size_t fread_be_uint32(unsigned int *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(unsigned int),n,instream); #ifndef WORDS_BIGENDIAN while (n-- > 0){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>24)&0xff) | ((*destination&0xff)<<24) | ((*destination>>8)&0xff00) | ((*destination&0xff00)<<8)); destination++; } #endif return result; } size_t fread_be_int16(short *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(short),n,instream); #ifndef WORDS_BIGENDIAN while (n-- > 0){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>8)&0xff) | ((*destination&0xff)<<8)); destination++; } #endif return result; } size_t fread_be_uint16(unsigned short *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(unsigned short),n,instream); #ifndef WORDS_BIGENDIAN while( n-- > 0 ){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>8)&0xff) | ((*destination&0xff)<<8)); destination++; } #endif return result; } /* static void swap_uint_32(unsigned int *tni4) // 4 byte integer numbers { *tni4=(((*tni4>>24)&0xff) | ((*tni4&0xff)<<24) | ((*tni4>>8)&0xff00) | ((*tni4&0xff00)<<8)); } */ size_t fread_be_float32(float *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(float),n,instream); #ifndef WORDS_BIGENDIAN while( n-- > 0 ) { swap_float_4(destination); destination++; } #endif return result; } size_t fread_be_char(char *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(char),n,instream); #ifndef WORDS_BIGENDIAN /* Probably don't need to do anything for characters */ #endif return result; } size_t fread_be_uchar(unsigned char *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(unsigned char),n,instream); #ifndef WORDS_BIGENDIAN /* Probably don't need to do anything for characters */ /* destination = ~destination; */ #endif return result; } size_t fread_be_wchar(wchar_t *destination, int n, FILE *instream){ size_t result; result = fread(destination, sizeof(wchar_t), n, instream); return result; } size_t fread_be_double64(double *destination, int n, FILE *instream){ size_t result; result = fread(destination,sizeof(double),n,instream); #ifndef WORDS_BIGENDIAN while( n-- > 0 ){ swap_float_8(destination); destination++; } #endif return result; } #if defined(HAVE_ZLIB) /************************************************************************* ** ** Code for reading from the gzipped binary files, doing bit flipping if ** necessary (on big-endian machines) ** ** ************************************************************************/ size_t gzread_int32(int *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(int)*n); //int gzread (gzFile file, voidp buf, unsigned int len); #ifdef WORDS_BIGENDIAN while( n-- > 0 ){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>24)&0xff) | ((*destination&0xff)<<24) | ((*destination>>8)&0xff00) | ((*destination&0xff00)<<8)); destination++; } #endif return result; } size_t gzread_uint32(unsigned int *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(unsigned int)*n); #ifdef WORDS_BIGENDIAN while( n-- > 0 ){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>24)&0xff) | ((*destination&0xff)<<24) | ((*destination>>8)&0xff00) | ((*destination&0xff00)<<8)); destination++; } #endif return result; } size_t gzread_int16(short *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(short)*n); #ifdef WORDS_BIGENDIAN while( n-- > 0 ){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>8)&0xff) | ((*destination&0xff)<<8)); destination++; } #endif return result; } size_t gzread_uint16(unsigned short *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(unsigned short)*n); #ifdef WORDS_BIGENDIAN while( n-- > 0 ){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>8)&0xff) | ((*destination&0xff)<<8)); destination++; } #endif return result; } size_t gzread_float32(float *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(float)*n); #ifdef WORDS_BIGENDIAN while( n-- > 0 ){ swap_float_4(destination); destination++; } #endif return result; } size_t gzread_char(char *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(char)*n); #ifdef WORDS_BIGENDIAN /* Probably don't need to do anything for characters */ #endif return result; } size_t gzread_uchar(unsigned char *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(unsigned char)*n); #ifdef WORDS_BIGENDIAN /* Probably don't need to do anything for characters */ #endif return result; } size_t gzread_double64(double *destination, int n, gzFile instream){ size_t result; result = gzread(instream, destination,sizeof(double)*n); #ifdef WORDS_BIGENDIAN while( n-- > 0 ){ swap_float_8(destination); destination++; } #endif return result; } /************************************************************************* ** ** Code for reading from the gzipped binary files written in BE format, doing bit flipping if ** necessary (on big-endian machines) ** ** ************************************************************************/ size_t gzread_be_int32(int *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(int)*n); //int gzread (gzFile file, voidp buf, unsigned int len); #ifndef WORDS_BIGENDIAN while( n-- > 0 ){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>24)&0xff) | ((*destination&0xff)<<24) | ((*destination>>8)&0xff00) | ((*destination&0xff00)<<8)); destination++; } #endif return result; } size_t gzread_be_uint32(unsigned int *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(unsigned int)*n); #ifndef WORDS_BIGENDIAN while( n-- > 0 ){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>24)&0xff) | ((*destination&0xff)<<24) | ((*destination>>8)&0xff00) | ((*destination&0xff00)<<8)); destination++; } #endif return result; } size_t gzread_be_int16(short *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(short)*n); #ifndef WORDS_BIGENDIAN while( n-- > 0 ){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>8)&0xff) | ((*destination&0xff)<<8)); destination++; } #endif return result; } size_t gzread_be_uint16(unsigned short *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(unsigned short)*n); #ifndef WORDS_BIGENDIAN while( n-- > 0 ){ /* bit flip since all Affymetrix binary files are little endian */ *destination=(((*destination>>8)&0xff) | ((*destination&0xff)<<8)); destination++; } #endif return result; } size_t gzread_be_float32(float *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(float)*n); #ifndef WORDS_BIGENDIAN while( n-- > 0 ){ swap_float_4(destination); destination++; } #endif return result; } size_t gzread_be_char(char *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(char)*n); #ifndef WORDS_BIGENDIAN /* Probably don't need to do anything for characters */ #endif return result; } size_t gzread_be_uchar(unsigned char *destination, int n, gzFile instream){ size_t result; result = gzread(instream,destination,sizeof(unsigned char)*n); #ifndef WORDS_BIGENDIAN /* Probably don't need to do anything for characters */ #endif return result; } size_t gzread_be_double64(double *destination, int n, gzFile instream){ size_t result; result = gzread(instream, destination,sizeof(double)*n); #ifndef WORDS_BIGENDIAN while( n-- > 0 ){ swap_float_8(destination); destination++; } #endif return result; } #endif void test_parsers_le(){ FILE* infile; int i; unsigned char my_uc; char my_c; unsigned short my_us; short my_s; unsigned int my_ui; int my_i; float my_f; double my_d; if ((infile = fopen("LittleEndianTest.bin", "rb")) == NULL) { Rprintf("Unable to open the file\n"); return ; } for (i = 0; i < 255; i++){ fread_uchar(&my_uc,1, infile); Rprintf("Was : %d should be %d\n",my_uc,i); } for (i = -128; i < 127; i++){ fread_char(&my_c,1, infile); Rprintf("Was : %d should be %d\n",my_c,i); } for (i =0; i < 15; i++){ fread_uint16(&my_us,1,infile); Rprintf("Was : %d \n", my_us); } for (i=0; i < 15; i++){ fread_int16(&my_s,1,infile); Rprintf("Was : %d \n", my_s); } for (i=0; i < 31; i++){ fread_uint32(&my_ui,1,infile); Rprintf("uint32 Was : %d \n", my_ui); } for (i=0; i < 31; i++){ fread_int32(&my_i,1, infile); Rprintf("int32 Was : %d \n", my_i); } for (i = 0; i < 25; i++){ fread_float32(&my_f,1,infile); Rprintf("float32 Was : %e \n", my_f); } fread_float32(&my_f,1,infile); Rprintf("PI float32 Was : %f \n", my_f); for (i = 0; i < 25; i++){ fread_double64(&my_d,1,infile); Rprintf("double64 Was : %le \n", my_d); } fread_double64(&my_d,1,infile); Rprintf("exp(1) double64 Was : %f \n", my_d); } void test_parsers_be(){ FILE* infile; int i; unsigned char my_uc; char my_c; unsigned short my_us; short my_s; unsigned int my_ui; int my_i; float my_f; double my_d; if ((infile = fopen("BigEndianTest.bin", "rb")) == NULL) { Rprintf("Unable to open the file\n"); return ; } for (i = 0; i < 255; i++){ fread_be_uchar(&my_uc,1, infile); Rprintf("Was : %d should be %d\n",my_uc,i); } for (i = -128; i < 127; i++){ fread_be_char(&my_c,1, infile); Rprintf("Was : %d should be %d\n",my_c,i); } for (i =0; i < 15; i++){ fread_be_uint16(&my_us,1,infile); Rprintf("Was : %d \n", my_us); } for (i=0; i < 15; i++){ fread_be_int16(&my_s,1,infile); Rprintf("Was : %d \n", my_s); } for (i=0; i < 31; i++){ fread_be_uint32(&my_ui,1,infile); Rprintf("uint32 Was : %d \n", my_ui); } for (i=0; i < 31; i++){ fread_be_int32(&my_i,1, infile); Rprintf("int32 Was : %d \n", my_i); } for (i = 0; i < 25; i++){ fread_be_float32(&my_f,1,infile); Rprintf("float32 Was : %e \n", my_f); } fread_be_float32(&my_f,1,infile); Rprintf("PI float32 Was : %f \n", my_f); for (i = 0; i < 25; i++){ fread_be_double64(&my_d,1,infile); Rprintf("double64 Was : %le \n", my_d); } fread_be_double64(&my_d,1,infile); Rprintf("exp(1) double64 Was : %f \n", my_d); } affyio/src/fread_functions.h0000644000175400017540000000470413556116171017201 0ustar00biocbuildbiocbuild#ifndef _FREAD_FUNCTIONS_HEADER #define _FREAD_FUNCTIONS_HEADER #include "stdlib.h" #include "stdio.h" #define HAVE_ZLIB 1 #if defined(HAVE_ZLIB) #include #endif size_t fread_int32(int *destination, int n, FILE *instream); size_t fread_uint32(unsigned int *destination, int n, FILE *instream); size_t fread_int16(short *destination, int n, FILE *instream); size_t fread_uint16(unsigned short *destination, int n, FILE *instream); size_t fread_float32(float *destination, int n, FILE *instream); size_t fread_char(char *destination, int n, FILE *instream); size_t fread_uchar(unsigned char *destination, int n, FILE *instream); size_t fread_double64(double *destination, int n, FILE *instream); size_t fread_be_int32(int *destination, int n, FILE *instream); size_t fread_be_uint32(unsigned int *destination, int n, FILE *instream); size_t fread_be_int16(short *destination, int n, FILE *instream); size_t fread_be_uint16(unsigned short *destination, int n, FILE *instream); size_t fread_be_float32(float *destination, int n, FILE *instream); size_t fread_be_char(char *destination, int n, FILE *instream); size_t fread_be_uchar(unsigned char *destination, int n, FILE *instream); size_t fread_be_double64(double *destination, int n, FILE *instream); size_t fread_be_wchar(wchar_t *destination, int n, FILE *instream); #if defined(HAVE_ZLIB) size_t gzread_int32(int *destination, int n, gzFile instream); size_t gzread_uint32(unsigned int *destination, int n, gzFile instream); size_t gzread_int16(short *destination, int n, gzFile instream); size_t gzread_uint16(unsigned short *destination, int n, gzFile instream); size_t gzread_float32(float *destination, int n, gzFile instream); size_t gzread_char(char *destination, int n, gzFile instream); size_t gzread_uchar(unsigned char *destination, int n, gzFile instream); size_t gzread_double64(double *destination, int n, gzFile instream); size_t gzread_be_int32(int *destination, int n, gzFile instream); size_t gzread_be_uint32(unsigned int *destination, int n, gzFile instream); size_t gzread_be_int16(short *destination, int n, gzFile instream); size_t gzread_be_uint16(unsigned short *destination, int n, gzFile instream); size_t gzread_be_float32(float *destination, int n, gzFile instream); size_t gzread_be_char(char *destination, int n, gzFile instream); size_t gzread_be_uchar(unsigned char *destination, int n, gzFile instream); size_t gzread_be_double64(double *destination, int n, gzFile instream); #endif #endif affyio/src/init_package.c0000644000175400017540000000134613556116171016440 0ustar00biocbuildbiocbuild/***************************************************** ** ** file: init_package.c ** ** Copyright (C) 2013 B. M. Bolstad ** ** aim: Register c code routines so that they can be called in other packages. **" ** History ** May 20, 2013 - Initial version ** *****************************************************/ #include #include #include #include "read_abatch.h" #if _MSC_VER >= 1000 __declspec(dllexport) #endif static const R_CallMethodDef callMethods[] = { {"read_abatch",(DL_FUNC)&read_abatch,7}, {"read_abatch_stddev",(DL_FUNC)&read_abatch,7}, {NULL, NULL, 0} }; void R_init_affyio(DllInfo *info){ R_registerRoutines(info, NULL, callMethods, NULL, NULL); } affyio/src/read_abatch.c0000644000175400017540000056105413556116171016246 0ustar00biocbuildbiocbuild/************************************************************* ** ** It should be noted that Laurent Gautier provided the ** initial CEL file parsing code as part of the file ** read_cdf.c. This code served us well for the 1.0-1.2 ** releases of Bioconductor. He should be commended for his ** fine code. ** ** The following code became the default parsing code ** at the BioC 1.3 release. ** ** Also note that this code was originally part of the affy package ** before being moved to affyio ** ************************************************************/ /************************************************************* ** ** file: read_abatch.c ** ** aim: read in from 1st to nth chips of CEL data ** ** Copyright (C) 2003-2008 B. M. Bolstad ** ** Created on Jun 13, 2003 ** ** Notes: ** ** The following assumptions are made about text CEL files. ** ** 1. A CEL file has a series of sections in the order ** ** [CEL] ** [HEADER] ** [INTENSITY] ** [MASKS] ** [OUTLIERS] ** ** 2. As part of opening the file we will check that ** the first characters of the file are "[CEL]" ** ** 3. In the [HEADER] section we expect lines beginning ** 3a. Cols= ** 3b. Rows= ** 3ab.1 Cols should appear before Rows ** 3c. DatHeader= ** 3c.1 On the DatHeader line there should appear a ** string with the final characters ".1sq". We ** will assume that this is the name of the ** CDF file (trim off the ".1sq") ** ** 4. In the [INTENSITY] section there should be ** 4a. A line beginning "CellHeader=" ** 4b. After this line there should be cols*rows probe ** intensity lines. Each of these lines should have ** 4.b.1 Five tokens. ** 4.b.2 the first token is an integer to be treated as ** the x location ** 4.b.3 the second token is an integer to be treated as ** the y location ** 4.b.4 the third token is a floating point number (double) ** to be treated as the probe intensity value. ** ** 5. The [MASKS] and [OUTLIERS] sections will be treated similarly ** 5a. We look for a line beginning NumberCells= ** this will be the number of Masked or Outlier CELS ** that we will expect to see. ** 5b. For each line of these sections we will expect to see ** the first two items are integers indicating the ** X and Y locations of probes that should be set to NA ** if the user sets the right flags. ** ** The implementation of parsing for binary files was based upon ** file format information supplied from Affymetrix. ** ** ** History ** ** Jun 13, 2003 - Initial version ** Jun 14, 2003 - Further implementation ** Jun 15, 2003 - testing. ** Jun 16, 2003 - Extra verbosity (user controlled). ** Jun 17, 2003 - MASKS, OUTLIERS ** Added mechanism for reading the header. ** this function called ReadHeader ** this can be used to check all files same ** as first file. ** Jun 19, 2003 - Naming of columns of intensity matrix done in ** C code ** Jun 29, 2003 - remove some unnecessary cruft from tokenize ** routine ** Jun 30, 2003 - remove tokenize step from read_cel_intensities. ** aim is to gain more speed. ** Jul 1, 2003 - deal with compressed files. ** To avoid ugly pre-processor constructs code ** for gz functions is seperate from that in ** text files. ** Made BUF_SIZE 1024 ** Jul 3, 2003 - add missing pre-processor command ** Jul 4, 2003 - A function for reading PM or MM or both ** as individual matrices. ** Made all the internal functions static ** to prevent namespace pollution ** Oct 3, 2003 - fix an error in check_cel_file /check_gzcel_file ** cdffile was not being properly checked ** Oct 14, 2003 - fix a long standing memory leak. ** Oct 17, 2003 - integrate binary format reading, Make ** it possible to read in a mixture of ** binary, text and gzipped text CEL files ** Nov 16, 2003 - More work on binary file support (in particular ** work on endian issues for no ia32 platforms) ** Nov 20, 2003 - Fix endian bug in readfloat32. Clean up the ** some of the documentation ** Dec 2, 2003 - Fix up fopen commands (hopefully) for a problem ** on w32 machines with text files. ** May 7, 2004 - make strncmp strncasecmp. No more problems ** with cdf name capitalization. ** May 11, 2004 - fix error message in case when zlib not available ** and a gzipped file has been supplied. ** Aug 16, 2005 - fix bit flipping when more than one number read ** Nov 15, 2005 - ability to read in SD and npixels values ** Nov 30, 2005 - remove compress argument from functions where it ** appears. it is legacy and has not been used in ** a great deal of time. ** Dec 14, 2005 - Added ReadHeaderDetailed: which attempts to ** be more complete then ReadHeader in what it returns ** from the header information basically the [CEL] and [HEADER] ** sections in the text files and similar information contained in ** the first section of the binary CEL file format ** Jan 27, 2006 - Complete ReadHeaderDetailed for supported formats. ** Add in a C level object for storing contents of a single ** CEL file ** May 31, 2006 - Fix some compiler warnings ** Jul 17, 2006 - Fix application of masks and outliers for binary cel files. ** Jul 21, 2006 - Binary parser checks for file truncation ** Aug 11, 2006 - Additional truncation checks for text files ** Aug 12, 2006 - Build the R construct that holds the CEL file ** Nov 3, 2006 - add gzipped binary CEL file support ** Apr 19, 2007 - Deal appropriately with non square CEL files (in binary format, the affymetrix documentation is inconsistent with the reality) ** May 13, 2007 - small fix for gzclose situation ** Aug 10, 2007 - fix for dangling open files each time a file is checked for binary format (Simon de Bernard, AltraBio) ** Aug 25, 2007 - Move file reading functions to centralized location ** Sep 6, 2007 - add support for generic (aka command console) format cel files ** Sep 7, 2007 - add support for gzipped generic (aka command console) format cel files ** Oct 28, 2007 - add pthread based multi-threaded read_probematrix this is based on a submission by Paul Gordon (U Calgary) ** Feb 18, 2008 - R_read_cel_file now can be told to read only the mean intensities (rather than also the SD and npixels) ** Mar 6, 2008 - Add additional CEL file corruption checking. ** Oct 16, 2008 - Fix issue with stack exhaustion ** Oct 28, 2008 - Increase stack space allocated (prevents a crash) ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues ** Jun 3, 2009 - CEL corruption not detected in read.probematrix ** Nov 10, 2009 - Pthread on solaris fix ** May 26, 2010 - Multichannel CEL file support initiated ** Sept 18, 2013 - improve 64bit support for read_abatch ** Jun 22, 2016 - Define PTHREAD_STACK_MIN if missing (e.g. Intel compiler) (DCT) ** Sept 4, 2017 - change gzFile* to gzFile ** *************************************************************/ #include #include #include #include #include "stdlib.h" #include "stdio.h" #include "fread_functions.h" #include "read_multichannel_celfile_generic.h" #include "read_celfile_generic.h" #include "read_abatch.h" #define HAVE_ZLIB 1 #if defined(HAVE_ZLIB) #include #endif #if USE_PTHREADS #include #include #include // Intel Compiler doesn't have PTHREAD_STACK_MIN in limits.h //Set to 16K - (Linux standard for x86 / x86_64 (4 x 4K pages) #ifndef PTHREAD_STACK_MIN #define PTHREAD_STACK_MIN 16384 #endif pthread_mutex_t mutex_R; int n_probesets; int *n_probes = NULL; double **cur_indexes = NULL; struct thread_data{ SEXP filenames; double *CurintensityMatrix; double *pmMatrix; double *mmMatrix; int i; int t; int chunk_size; int ref_dim_1; int ref_dim_2; int n_files; int num_probes; SEXP cdfInfo; const char *refCdfName; int which_flag; SEXP verbose; }; #define THREADS_ENV_VAR "R_THREADS" #endif #define BUF_SIZE 1024 /****************************************************************** ** ** A "C" level object designed to hold information for a ** single CEL file ** ** These should be created using the function ** ** read_cel_file() ** ** ** *****************************************************************/ typedef struct{ detailed_header_info header; int multichannel; char **channelnames; /** these are for storing the intensities, the sds and the number of pixels **/ double **intensities; double **stddev; double **npixels; /** these are for storing information in the masks and outliers section **/ int *nmasks; int *noutliers; short **masks_x, **masks_y; short **outliers_x, **outliers_y; } CEL; /**************************************************************** **************************************************************** ** ** Code for spliting strings into tokens. ** Not heavily used anymore ** *************************************************************** ***************************************************************/ /*************************************************************** ** ** tokenset ** ** char **tokens - a array of token strings ** int n - number of tokens in this set. ** ** a structure to hold a set of tokens. Typically a tokenset is ** created by breaking a character string based upon a set of ** delimiters. ** ** **************************************************************/ typedef struct{ char **tokens; int n; } tokenset; /****************************************************************** ** ** tokenset *tokenize(char *str, char *delimiters) ** ** char *str - a string to break into tokens ** char *delimiters - delimiters to use in breaking up the line ** ** ** RETURNS a new tokenset ** ** Given a string, split into tokens based on a set of delimitors ** *****************************************************************/ static tokenset *tokenize(char *str, char *delimiters){ #if USE_PTHREADS char *tmp_pointer; #endif int i=0; char *current_token; tokenset *my_tokenset = Calloc(1,tokenset); my_tokenset->n=0; my_tokenset->tokens = NULL; #if USE_PTHREADS current_token = strtok_r(str,delimiters,&tmp_pointer); #else current_token = strtok(str,delimiters); #endif while (current_token != NULL){ my_tokenset->n++; my_tokenset->tokens = Realloc(my_tokenset->tokens,my_tokenset->n,char*); my_tokenset->tokens[i] = Calloc(strlen(current_token)+1,char); strcpy(my_tokenset->tokens[i],current_token); my_tokenset->tokens[i][(strlen(current_token))] = '\0'; i++; #if USE_PTHREADS current_token = strtok_r(NULL,delimiters,&tmp_pointer); #else current_token = strtok(NULL,delimiters); #endif } return my_tokenset; } /****************************************************************** ** ** int tokenset_size(tokenset *x) ** ** tokenset *x - a tokenset ** ** RETURNS the number of tokens in the tokenset ** ******************************************************************/ static int tokenset_size(tokenset *x){ return x->n; } /****************************************************************** ** ** char *get_token(tokenset *x, int i) ** ** tokenset *x - a tokenset ** int i - index of the token to return ** ** RETURNS pointer to the i'th token ** ******************************************************************/ static char *get_token(tokenset *x,int i){ return x->tokens[i]; } /****************************************************************** ** ** void delete_tokens(tokenset *x) ** ** tokenset *x - a tokenset ** ** Deallocates all the space allocated for a tokenset ** ******************************************************************/ static void delete_tokens(tokenset *x){ int i; for (i=0; i < x->n; i++){ Free(x->tokens[i]); } Free(x->tokens); Free(x); } /******************************************************************* ** ** int token_ends_with(char *token, char *ends) ** ** char *token - a string to check ** char *ends_in - we are looking for this string at the end of token ** ** ** returns 0 if no match, otherwise it returns the index of the first character ** which matchs the start of *ends. ** ** Note that there must be one additional character in "token" beyond ** the characters in "ends". So ** ** *token = "TestStr" ** *ends = "TestStr" ** ** would return 0 but if ** ** ends = "estStr" ** ** we would return 1. ** ** and if ** ** ends= "stStr" ** we would return 2 .....etc ** ** ******************************************************************/ static int token_ends_with(char *token, char *ends_in){ int tokenlength = strlen(token); int ends_length = strlen(ends_in); int start_pos; char *tmp_ptr; if (tokenlength <= ends_length){ /* token string is too short so can't possibly end with ends */ return 0; } start_pos = tokenlength - ends_length; tmp_ptr = &token[start_pos]; if (strcmp(tmp_ptr,ends_in)==0){ return start_pos; } else { return 0; } } /**************************************************************** **************************************************************** ** ** Code for dealing with text CEL files. ** *************************************************************** ***************************************************************/ /**************************************************************** ** ** void ReadFileLine(char *buffer, int buffersize, FILE *currentFile) ** ** char *buffer - place to store contents of the line ** int buffersize - size of the buffer ** FILE *currentFile - FILE pointer to an opened CEL file. ** ** Read a line from a file, into a buffer of specified size. ** otherwise die. ** ***************************************************************/ static void ReadFileLine(char *buffer, int buffersize, FILE *currentFile){ if (fgets(buffer, buffersize, currentFile) == NULL){ error("End of file reached unexpectedly. Perhaps this file is truncated.\n"); } } /**************************************************************** ** ** FILE *open_cel_file(const char *filename) ** ** const char *filename - name of file to open ** ** ** RETURNS a file pointer to the open file ** ** this file will open the named file and check to see that the ** first characters agree with "[CEL]" ** ***************************************************************/ static FILE *open_cel_file(const char *filename){ const char *mode = "r"; FILE *currentFile = NULL; char buffer[BUF_SIZE]; currentFile = fopen(filename,mode); if (currentFile == NULL){ error("Could not open file %s", filename); } else { /** check to see if first line is [CEL] so looks like a CEL file**/ ReadFileLine(buffer, BUF_SIZE, currentFile); if (strncmp("[CEL]", buffer, 4) == 0) { rewind(currentFile); } else { error("The file %s does not look like a CEL file",filename); } } return currentFile; } /****************************************************************** ** ** void findStartsWith(FILE *my_file,char *starts, char *buffer) ** ** FILE *my_file - an open file to read from ** char *starts - the string to search for at the start of each line ** char *buffer - where to place the line that has been read. ** ** ** Find a line that starts with the specified character string. ** At exit buffer should contain that line ** *****************************************************************/ static void findStartsWith(FILE *my_file,char *starts, char *buffer){ int starts_len = strlen(starts); int match = 1; do { ReadFileLine(buffer, BUF_SIZE, my_file); match = strncmp(starts, buffer, starts_len); } while (match != 0); } /****************************************************************** ** ** void AdvanceToSection(FILE *my_file,char *sectiontitle, char *buffer) ** ** FILE *my_file - an open file ** char *sectiontitle - string we are searching for ** char *buffer - return's with line starting with sectiontitle ** ** *****************************************************************/ static void AdvanceToSection(FILE *my_file,char *sectiontitle, char *buffer){ findStartsWith(my_file,sectiontitle,buffer); } /****************************************************************** ** ** int check_cel_file(const char *filename, const char *ref_cdfName, int ref_dim_1, int ref_dim_2) ** ** const char *filename - the file to read ** const char *ref_cdfName - the reference CDF filename ** int ref_dim_1 - 1st dimension of reference cel file ** int ref_dim_2 - 2nd dimension of reference cel file ** ** returns 0 if no problem, 1 otherwise ** ** The aim of this function is to read the header of the CEL file ** in particular we will look for the rows beginning "Cols=" and "Rows=" ** and then for the line DatHeader= to scope out the appropriate cdf ** file. An error() will be flagged if the appropriate conditions ** are not met. ** ** ******************************************************************/ static int check_cel_file(const char *filename, const char *ref_cdfName, int ref_dim_1, int ref_dim_2){ int i; int dim1,dim2; FILE *currentFile; char buffer[BUF_SIZE]; tokenset *cur_tokenset; currentFile = open_cel_file(filename); AdvanceToSection(currentFile,"[HEADER]",buffer); findStartsWith(currentFile,"Cols",buffer); cur_tokenset = tokenize(buffer,"="); dim1 = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"Rows",buffer); cur_tokenset = tokenize(buffer,"="); dim2 = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); if ((dim1 != ref_dim_1) || (dim2 != ref_dim_2)){ error("Cel file %s does not seem to have the correct dimensions",filename); } findStartsWith(currentFile,"DatHeader",buffer); cur_tokenset = tokenize(buffer," "); for (i =0; i < tokenset_size(cur_tokenset);i++){ if (strncasecmp(get_token(cur_tokenset,i),ref_cdfName,strlen(ref_cdfName)) == 0){ break; } if (i == (tokenset_size(cur_tokenset) - 1)){ error("Cel file %s does not seem to be of %s type",filename,ref_cdfName); } } delete_tokens(cur_tokenset); fclose(currentFile); return 0; } /************************************************************************ ** ** int read_cel_file_intensities(const char *filename, double *intensity, int chip_num, int rows, int cols) ** ** const char *filename - the name of the cel file to read ** double *intensity - the intensity matrix to fill ** int chip_num - the column of the intensity matrix that we will be filling ** int rows - dimension of intensity matrix ** int cols - dimension of intensity matrix ** ** returns 0 if successful, non zero if unsuccessful ** ** This function reads from the specified file the cel intensities for that ** array and fills a column of the intensity matrix. ** ************************************************************************/ static int read_cel_file_intensities(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ #if USE_PTHREADS char *tmp_pointer; #endif /* int */ size_t i, cur_index; int cur_x, cur_y; double cur_mean; FILE *currentFile; char buffer[BUF_SIZE]; /* tokenset *cur_tokenset;*/ char *current_token; currentFile = open_cel_file(filename); AdvanceToSection(currentFile,"[INTENSITY]",buffer); findStartsWith(currentFile,"CellHeader=",buffer); for (i=0; i < rows; i++){ ReadFileLine(buffer, BUF_SIZE, currentFile); /* cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); cur_mean = atof(get_token(cur_tokenset,2)); */ if (strlen(buffer) <=2){ Rprintf("Warning: found an empty line where not expected in %s.\nThis means that there is a cel intensity missing from the cel file.\nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, i); break; } #if USE_PTHREADS current_token = strtok_r(buffer," \t",&tmp_pointer); #else current_token = strtok(buffer," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_x = atoi(current_token); #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_y = atoi(current_token); #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } if (cur_x < 0 || cur_x >= chip_dim_rows){ error("It appears that the file %s is corrupted.",filename); return 1; } if (cur_y < 0 || cur_y >= chip_dim_rows){ error("It appears that the file %s is corrupted.",filename); return 1; } cur_mean = atof(current_token); cur_index = cur_x + chip_dim_rows*(cur_y); intensity[chip_num*rows + cur_index] = cur_mean; /* delete_tokens(cur_tokenset); */ } fclose(currentFile); if (i != rows){ return 1; } return 0; } /************************************************************************ ** ** int read_cel_file_stddev(const char *filename, double *intensity, int chip_num, int rows, int cols) ** ** const char *filename - the name of the cel file to read ** double *intensity - the intensity matrix to fill ** int chip_num - the column of the intensity matrix that we will be filling ** int rows - dimension of intensity matrix ** int cols - dimension of intensity matrix ** ** returns 0 if successful, non zero if unsuccessful ** ** This function reads from the specified file the cel stddev for that ** array and fills a column of the intensity matrix. ** ************************************************************************/ static int read_cel_file_stddev(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ #if USE_PTHREADS char *tmp_pointer; #endif size_t i, cur_x,cur_y,cur_index; double cur_stddev; FILE *currentFile; char buffer[BUF_SIZE]; /* tokenset *cur_tokenset;*/ char *current_token; currentFile = open_cel_file(filename); AdvanceToSection(currentFile,"[INTENSITY]",buffer); findStartsWith(currentFile,"CellHeader=",buffer); for (i=0; i < rows; i++){ ReadFileLine(buffer, BUF_SIZE, currentFile); /* cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); cur_mean = atof(get_token(cur_tokenset,2)); */ if (strlen(buffer) <=2){ Rprintf("Warning: found an empty line where not expected in %s.\n This means that there is a cel intensity missing from the cel file.\n Sucessfully read to cel intensity %d of %d expected\n", filename, i-1, i); break; } #if USE_PTHREADS current_token = strtok_r(buffer," \t",&tmp_pointer); #else current_token = strtok(buffer," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_x = atoi(current_token); #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_y = atoi(current_token); #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_stddev = atof(current_token); cur_index = cur_x + chip_dim_rows*(cur_y); intensity[chip_num*rows + cur_index] = cur_stddev; /* delete_tokens(cur_tokenset); */ } fclose(currentFile); if (i != rows){ return 1; } return 0; } /************************************************************************ ** ** int read_cel_file_npixels(const char *filename, double *intensity, int chip_num, int rows, int cols) ** ** const char *filename - the name of the cel file to read ** double *intensity - the intensity matrix to fill ** int chip_num - the column of the intensity matrix that we will be filling ** int rows - dimension of intensity matrix ** int cols - dimension of intensity matrix ** ** returns 0 if successful, non zero if unsuccessful ** ** This function reads from the specified file the cel stddev for that ** array and fills a column of the intensity matrix. ** ************************************************************************/ static int read_cel_file_npixels(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ #if USE_PTHREADS char *tmp_pointer; #endif size_t i, cur_x,cur_y,cur_index,cur_npixels; FILE *currentFile; char buffer[BUF_SIZE]; /* tokenset *cur_tokenset;*/ char *current_token; currentFile = open_cel_file(filename); AdvanceToSection(currentFile,"[INTENSITY]",buffer); findStartsWith(currentFile,"CellHeader=",buffer); for (i=0; i < rows; i++){ ReadFileLine(buffer, BUF_SIZE, currentFile); /* cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); cur_mean = atof(get_token(cur_tokenset,2)); */ if (strlen(buffer) <=2){ Rprintf("Warning: found an empty line where not expected in %s.\n This means that there is a cel intensity missing from the cel file.\n Sucessfully read to cel intensity %d of %d expected\n", filename, i-1, i); break; } #if USE_PTHREADS current_token = strtok_r(buffer," \t",&tmp_pointer); #else current_token = strtok(buffer," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_x = atoi(current_token); #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_y = atoi(current_token); #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_npixels = atoi(current_token); cur_index = cur_x + chip_dim_rows*(cur_y); intensity[chip_num*rows + cur_index] = (double)cur_npixels; /* delete_tokens(cur_tokenset); */ } fclose(currentFile); if (i != rows){ return 1; } return 0; } /**************************************************************** ** ** void apply_masks(const char *filename, double *intensity, int chip_num, ** int rows, int cols,int chip_dim_rows, ** int rm_mask, int rm_outliers) ** ** const char *filename - name of file to open ** double *intensity - matrix of probe intensities ** int chip_num - the index 0 ...n-1 of the chip we are dealing with ** int rows - dimension of the intensity matrix ** int cols - dimension of the intensity matrix ** int chip_dim_rows - a dimension of the chip ** int rm_mask - if true locations in the MASKS section are set NA ** int rm_outliers - if true locations in the OUTLIERS section are set NA ** ** This function sets the MASK and OUTLIER probes to NA ** ** ****************************************************************/ static void apply_masks(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows, int rm_mask, int rm_outliers){ size_t i; size_t numcells, cur_x, cur_y, cur_index; FILE *currentFile; char buffer[BUF_SIZE]; tokenset *cur_tokenset; if ((!rm_mask) && (!rm_outliers)){ /* no masking or outliers */ return; } currentFile = open_cel_file(filename); /* read masks section */ if (rm_mask){ AdvanceToSection(currentFile,"[MASKS]",buffer); findStartsWith(currentFile,"NumberCells=",buffer); cur_tokenset = tokenize(buffer,"="); numcells = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"CellHeader=",buffer); for (i =0; i < numcells; i++){ ReadFileLine(buffer, BUF_SIZE, currentFile); cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); cur_index = cur_x + chip_dim_rows*(cur_y); intensity[chip_num*rows + cur_index] = R_NaN; delete_tokens(cur_tokenset); } } /* read outliers section */ if (rm_outliers){ AdvanceToSection(currentFile,"[OUTLIERS]",buffer); findStartsWith(currentFile,"NumberCells=",buffer); cur_tokenset = tokenize(buffer,"="); numcells = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"CellHeader=",buffer); for (i = 0; i < numcells; i++){ ReadFileLine(buffer, BUF_SIZE, currentFile); cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); cur_index = cur_x + chip_dim_rows*(cur_y); intensity[chip_num*rows + cur_index] = R_NaReal; delete_tokens(cur_tokenset); } } fclose(currentFile); } /**************************************************************** ** ** static void get_masks_outliers(const char *filename, ** int *nmasks, short **masks_x, short **masks_y, ** int *noutliers, short **outliers_x, short **outliers_y ** ** This gets the x and y coordinates stored in the masks and outliers sections ** of the cel files. ** ****************************************************************/ static void get_masks_outliers(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y){ FILE *currentFile; char buffer[BUF_SIZE]; int numcells, cur_x, cur_y; tokenset *cur_tokenset; int i; currentFile = open_cel_file(filename); /* read masks section */ AdvanceToSection(currentFile,"[MASKS]",buffer); findStartsWith(currentFile,"NumberCells=",buffer); cur_tokenset = tokenize(buffer,"="); numcells = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"CellHeader=",buffer); *nmasks = numcells; *masks_x = Calloc(numcells,short); *masks_y = Calloc(numcells,short); for (i =0; i < numcells; i++){ ReadFileLine(buffer, BUF_SIZE, currentFile); cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); (*masks_x)[i] = (short)cur_x; (*masks_y)[i] = (short)cur_y; delete_tokens(cur_tokenset); } /* read outliers section */ AdvanceToSection(currentFile,"[OUTLIERS]",buffer); findStartsWith(currentFile,"NumberCells=",buffer); cur_tokenset = tokenize(buffer,"="); numcells = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"CellHeader=",buffer); *noutliers = numcells; *outliers_x = Calloc(numcells,short); *outliers_y = Calloc(numcells,short); for (i = 0; i < numcells; i++){ ReadFileLine(buffer, BUF_SIZE, currentFile); cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); /* Rprintf("%d: %d %d %d\n",i, cur_x,cur_y, numcells); */ (*outliers_x)[i] = (short)cur_x; (*outliers_y)[i] = (short)cur_y; delete_tokens(cur_tokenset); } fclose(currentFile); } /************************************************************************* ** ** char *get_header_info(const char *filename, int *dim1, int *dim2) ** ** const char *filename - file to open ** int *dim1 - place to store Cols ** int *dim2 - place to store Rows ** ** returns a character string containing the CDF name. ** ** gets the header information (cols, rows and cdfname) ** ************************************************************************/ static char *get_header_info(const char *filename, int *dim1, int *dim2){ int i,endpos; char *cdfName = NULL; FILE *currentFile; char buffer[BUF_SIZE]; tokenset *cur_tokenset; currentFile = open_cel_file(filename); AdvanceToSection(currentFile,"[HEADER]",buffer); findStartsWith(currentFile,"Cols",buffer); cur_tokenset = tokenize(buffer,"="); *dim1 = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"Rows",buffer); cur_tokenset = tokenize(buffer,"="); *dim2 = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"DatHeader",buffer); cur_tokenset = tokenize(buffer," "); for (i =0; i < tokenset_size(cur_tokenset);i++){ /* look for a token ending in ".1sq" */ endpos=token_ends_with(get_token(cur_tokenset,i),".1sq"); if(endpos > 0){ /* Found the likely CDF name, now chop of .1sq and store it */ cdfName= Calloc(endpos+1,char); strncpy(cdfName,get_token(cur_tokenset,i),endpos); cdfName[endpos] = '\0'; break; } if (i == (tokenset_size(cur_tokenset) - 1)){ error("Cel file %s does not seem to be have cdf information",filename); } } delete_tokens(cur_tokenset); fclose(currentFile); return(cdfName); } /************************************************************************* ** ** void get_detailed_header_info(const char *filename, detailed_header_info *header_info) ** ** const char *filename - file to open ** detailed_header_info *header_info - place to store header information ** ** reads the header information from a text cdf file (ignoring some fields ** that are unused). ** ************************************************************************/ static void get_detailed_header_info(const char *filename, detailed_header_info *header_info){ int i,endpos; FILE *currentFile; char buffer[BUF_SIZE]; char *buffercopy; tokenset *cur_tokenset; currentFile = open_cel_file(filename); AdvanceToSection(currentFile,"[HEADER]",buffer); findStartsWith(currentFile,"Cols",buffer); cur_tokenset = tokenize(buffer,"="); header_info->cols = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"Rows",buffer); cur_tokenset = tokenize(buffer,"="); header_info->rows = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"GridCornerUL",buffer); cur_tokenset = tokenize(buffer,"= "); header_info->GridCornerULx = atoi(get_token(cur_tokenset,1)); header_info->GridCornerULy = atoi(get_token(cur_tokenset,2)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"GridCornerUR",buffer); cur_tokenset = tokenize(buffer,"= "); header_info->GridCornerURx = atoi(get_token(cur_tokenset,1)); header_info->GridCornerURy = atoi(get_token(cur_tokenset,2)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"GridCornerLR",buffer); cur_tokenset = tokenize(buffer,"= "); header_info->GridCornerLRx = atoi(get_token(cur_tokenset,1)); header_info->GridCornerLRy = atoi(get_token(cur_tokenset,2)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"GridCornerLL",buffer); cur_tokenset = tokenize(buffer,"= "); header_info->GridCornerLLx = atoi(get_token(cur_tokenset,1)); header_info->GridCornerLLy = atoi(get_token(cur_tokenset,2)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"DatHeader",buffer); /* first lets copy the entire string over */ buffercopy = Calloc(strlen(buffer)+1,char); strcpy(buffercopy,buffer); cur_tokenset = tokenize(buffercopy,"\r\n"); header_info->DatHeader = Calloc(strlen(get_token(cur_tokenset,0))-8,char); strcpy(header_info->DatHeader,(get_token(cur_tokenset,0)+10)); /* the +10 is to avoid the starting "DatHeader=" */ Free(buffercopy); delete_tokens(cur_tokenset); /* now pull out the actual cdfname */ cur_tokenset = tokenize(buffer," "); for (i =0; i < tokenset_size(cur_tokenset);i++){ /* look for a token ending in ".1sq" */ endpos=token_ends_with(get_token(cur_tokenset,i),".1sq"); if(endpos > 0){ /* Found the likely CDF name, now chop of .1sq and store it */ header_info->cdfName= Calloc(endpos+1,char); strncpy( header_info->cdfName,get_token(cur_tokenset,i),endpos); header_info->cdfName[endpos] = '\0'; break; } if (i == (tokenset_size(cur_tokenset) - 1)){ error("Cel file %s does not seem to be have cdf information",filename); } } delete_tokens(cur_tokenset); findStartsWith(currentFile,"Algorithm",buffer); cur_tokenset = tokenize(buffer,"=\r\n"); header_info->Algorithm = Calloc(strlen(get_token(cur_tokenset,1))+1,char); strcpy(header_info->Algorithm,get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(currentFile,"AlgorithmParameters",buffer); cur_tokenset = tokenize(buffer,"=\r\n"); header_info->AlgorithmParameters = Calloc(strlen(get_token(cur_tokenset,1))+1,char); strcpy(header_info->AlgorithmParameters,get_token(cur_tokenset,1)); fclose(currentFile); header_info->ScanDate = Calloc(2, char); } /*************************************************************** ** ** int isTextCelFile(const char *filename) ** ** test whether the file is a valid text cel file ** ** **************************************************************/ static int isTextCelFile(const char *filename){ const char *mode = "r"; FILE *currentFile= NULL; char buffer[BUF_SIZE]; currentFile = fopen(filename,mode); if (currentFile == NULL){ error("Could not open file %s", filename); } else { /** check to see if first line is [CEL] so looks like a CEL file**/ ReadFileLine(buffer, BUF_SIZE, currentFile); fclose(currentFile); if (strncmp("[CEL]", buffer, 4) == 0) { return 1; } } return 0; } /**************************************************************** **************************************************************** ** ** Code for GZ files starts here. ** *************************************************************** ***************************************************************/ #if defined(HAVE_ZLIB) /**************************************************************** ** ** void ReadgzFileLine(char *buffer, int buffersize, FILE *currentFile) ** ** char *buffer - place to store contents of the line ** int buffersize - size of the buffer ** FILE *currentFile - FILE pointer to an opened CEL file. ** ** Read a line from a gzipped file, into a buffer of specified size. ** otherwise die. ** ***************************************************************/ static void ReadgzFileLine(char *buffer, int buffersize, gzFile currentFile){ if (gzgets( currentFile,buffer, buffersize) == NULL){ error("End of gz file reached unexpectedly. Perhaps this file is truncated.\n"); } } /**************************************************************** ** ** FILE *open_gz_cel_file(const char *filename) ** ** const char *filename - name of file to open ** ** ** RETURNS a file pointer to the open file ** ** this file will open the named file and check to see that the ** first characters agree with "[CEL]" ** ***************************************************************/ static gzFile open_gz_cel_file(const char *filename){ const char *mode = "rb"; gzFile currentFile= NULL; char buffer[BUF_SIZE]; currentFile = gzopen(filename,mode); if (currentFile == NULL){ error("Could not open file %s", filename); } else { /** check to see if first line is [CEL] so looks like a CEL file**/ ReadgzFileLine(buffer, BUF_SIZE, currentFile); if (strncmp("[CEL]", buffer, 4) == 0) { gzrewind(currentFile); } else { error("The file %s does not look like a CEL file",filename); } } return currentFile; } /****************************************************************** ** ** void gzfindStartsWith(gzFile my_file,char *starts, char *buffer) ** ** FILE *my_file - an open file to read from ** char *starts - the string to search for at the start of each line ** char *buffer - where to place the line that has been read. ** ** ** Find a line that starts with the specified character string. ** At exit buffer should contain that line ** *****************************************************************/ static void gzfindStartsWith(gzFile my_file,char *starts, char *buffer){ int starts_len = strlen(starts); int match = 1; do { ReadgzFileLine(buffer, BUF_SIZE, my_file); match = strncmp(starts, buffer, starts_len); } while (match != 0); } /****************************************************************** ** ** void gzAdvanceToSection(gzFile my_file,char *sectiontitle, char *buffer) ** ** FILE *my_file - an open file ** char *sectiontitle - string we are searching for ** char *buffer - return's with line starting with sectiontitle ** ** *****************************************************************/ static void gzAdvanceToSection(gzFile my_file,char *sectiontitle, char *buffer){ gzfindStartsWith(my_file,sectiontitle,buffer); } /****************************************************************** ** ** int check_gzcel_file(const char *filename, char *ref_cdfName, int ref_dim_1, int ref_dim_2) ** ** const char *filename - the file to read ** char *ref_cdfName - the reference CDF filename ** int ref_dim_1 - 1st dimension of reference cel file ** int ref_dim_2 - 2nd dimension of reference cel file ** ** returns 0 if no problem, 1 otherwise ** ** The aim of this function is to read the header of the CEL file ** in particular we will look for the rows beginning "Cols=" and "Rows=" ** and then for the line DatHeader= to scope out the appropriate cdf ** file. An error() will be flagged if the appropriate conditions ** are not met. ** ** ******************************************************************/ static int check_gzcel_file(const char *filename, const char *ref_cdfName, int ref_dim_1, int ref_dim_2){ int i; int dim1,dim2; gzFile currentFile; char buffer[BUF_SIZE]; tokenset *cur_tokenset; currentFile = open_gz_cel_file(filename); gzAdvanceToSection(currentFile,"[HEADER]",buffer); gzfindStartsWith(currentFile,"Cols",buffer); cur_tokenset = tokenize(buffer,"="); dim1 = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"Rows",buffer); cur_tokenset = tokenize(buffer,"="); dim2 = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); if ((dim1 != ref_dim_1) || (dim2 != ref_dim_2)){ error("Cel file %s does not seem to have the correct dimensions",filename); } gzfindStartsWith(currentFile,"DatHeader",buffer); cur_tokenset = tokenize(buffer," "); for (i =0; i < tokenset_size(cur_tokenset);i++){ if (strncasecmp(get_token(cur_tokenset,i),ref_cdfName,strlen(ref_cdfName)) == 0){ break; } if (i == (tokenset_size(cur_tokenset) - 1)){ error("Cel file %s does not seem to be of %s type",filename,ref_cdfName); } } delete_tokens(cur_tokenset); gzclose(currentFile); return 0; } /************************************************************************ ** ** int read_gzcel_file_intensities(const char *filename, double *intensity, int chip_num, int rows, int cols) ** ** const char *filename - the name of the cel file to read ** double *intensity - the intensity matrix to fill ** int chip_num - the column of the intensity matrix that we will be filling ** int rows - dimension of intensity matrix ** int cols - dimension of intensity matrix ** ** returns 0 if successful, non zero if unsuccessful ** ** This function reads from the specified file the cel intensities for that ** array and fills a column of the intensity matrix. ** ************************************************************************/ static int read_gzcel_file_intensities(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ #if USE_PTHREADS char *tmp_pointer; #endif size_t i, cur_index; int cur_x, cur_y; double cur_mean; gzFile currentFile; char buffer[BUF_SIZE]; /* tokenset *cur_tokenset;*/ char *current_token; currentFile = open_gz_cel_file(filename); gzAdvanceToSection(currentFile,"[INTENSITY]",buffer); gzfindStartsWith(currentFile,"CellHeader=",buffer); for (i=0; i < rows; i++){ ReadgzFileLine(buffer, BUF_SIZE, currentFile); /* cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); cur_mean = atof(get_token(cur_tokenset,2)); */ #if USE_PTHREADS current_token = strtok_r(buffer," \t",&tmp_pointer); #else current_token = strtok(buffer," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_x = atoi(current_token); #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_y = atoi(current_token); #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } if (cur_x < 0 || cur_x >= chip_dim_rows){ error("It appears that the file %s is corrupted.",filename); return 1; } if (cur_y < 0 || cur_y >= chip_dim_rows){ error("It appears that the file %s is corrupted.",filename); return 1; } cur_mean = atof(current_token); cur_index = cur_x + chip_dim_rows*(cur_y); intensity[chip_num*rows + cur_index] = cur_mean; /* delete_tokens(cur_tokenset); */ } gzclose(currentFile); if (i != rows){ return 1; } return 0; } /************************************************************************ ** ** int read_gzcel_file_stddev(const char *filename, double *intensity, int chip_num, int rows, int cols) ** ** const char *filename - the name of the cel file to read ** double *intensity - the intensity matrix to fill ** int chip_num - the column of the intensity matrix that we will be filling ** int rows - dimension of intensity matrix ** int cols - dimension of intensity matrix ** ** returns 0 if successful, non zero if unsuccessful ** ** This function reads from the specified file the cel intensities for that ** array and fills a column of the intensity matrix. ** ************************************************************************/ static int read_gzcel_file_stddev(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ #if USE_PTHREADS char *tmp_pointer; #endif size_t i, cur_x,cur_y,cur_index; double cur_stddev; gzFile currentFile; char buffer[BUF_SIZE]; /* tokenset *cur_tokenset;*/ char *current_token; currentFile = open_gz_cel_file(filename); gzAdvanceToSection(currentFile,"[INTENSITY]",buffer); gzfindStartsWith(currentFile,"CellHeader=",buffer); for (i=0; i < rows; i++){ ReadgzFileLine(buffer, BUF_SIZE, currentFile); /* cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); cur_mean = atof(get_token(cur_tokenset,2)); */ #if USE_PTHREADS current_token = strtok_r(buffer," \t",&tmp_pointer); #else current_token = strtok(buffer," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_x = atoi(current_token); #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_y = atoi(current_token); #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_stddev = atof(current_token); cur_index = cur_x + chip_dim_rows*(cur_y); intensity[chip_num*rows + cur_index] = cur_stddev; /* delete_tokens(cur_tokenset); */ } gzclose(currentFile); if (i != rows){ return 1; } return 0; } /************************************************************************ ** ** int read_gzcel_file_npixels(const char *filename, double *intensity, int chip_num, int rows, int cols) ** ** const char *filename - the name of the cel file to read ** double *intensity - the intensity matrix to fill ** int chip_num - the column of the intensity matrix that we will be filling ** int rows - dimension of intensity matrix ** int cols - dimension of intensity matrix ** ** returns 0 if successful, non zero if unsuccessful ** ** This function reads from the specified file the cel npixels for that ** array and fills a column of the intensity matrix. ** ************************************************************************/ static int read_gzcel_file_npixels(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ #if USE_PTHREADS char *tmp_pointer; #endif size_t i, cur_x,cur_y,cur_index,cur_npixels; gzFile currentFile; char buffer[BUF_SIZE]; /* tokenset *cur_tokenset;*/ char *current_token; currentFile = open_gz_cel_file(filename); gzAdvanceToSection(currentFile,"[INTENSITY]",buffer); gzfindStartsWith(currentFile,"CellHeader=",buffer); for (i=0; i < rows; i++){ ReadgzFileLine(buffer, BUF_SIZE, currentFile); /* cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); cur_mean = atof(get_token(cur_tokenset,2)); */ #if USE_PTHREADS current_token = strtok_r(buffer," \t",&tmp_pointer); #else current_token = strtok(buffer," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_x = atoi(current_token); #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_y = atoi(current_token); #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } #if USE_PTHREADS current_token = strtok_r(NULL," \t",&tmp_pointer); #else current_token = strtok(NULL," \t"); #endif if (current_token == NULL){ Rprintf("Warning: found an incomplete line where not expected in %s.\nThe CEL file may be truncated. \nSucessfully read to cel intensity %d of %d expected\n", filename, i-1, rows); break; } cur_npixels = atoi(current_token); cur_index = cur_x + chip_dim_rows*(cur_y); intensity[chip_num*rows + cur_index] = (double)cur_npixels; /* delete_tokens(cur_tokenset); */ } gzclose(currentFile); if (i != rows){ return 1; } return 0; } /**************************************************************** ** ** void gz_apply_masks(const char *filename, double *intensity, int chip_num, ** int rows, int cols,int chip_dim_rows, ** int rm_mask, int rm_outliers) ** ** const char *filename - name of file to open ** double *intensity - matrix of probe intensities ** int chip_num - the index 0 ...n-1 of the chip we are dealing with ** int rows - dimension of the intensity matrix ** int cols - dimension of the intensity matrix ** int chip_dim_rows - a dimension of the chip ** int rm_mask - if true locations in the MASKS section are set NA ** int rm_outliers - if true locations in the OUTLIERS section are set NA ** ** This function sets the MASK and OUTLIER probes to NA ** ** ****************************************************************/ static void gz_apply_masks(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows, int rm_mask, int rm_outliers){ size_t i; size_t numcells, cur_x, cur_y, cur_index; gzFile currentFile; char buffer[BUF_SIZE]; tokenset *cur_tokenset; if ((!rm_mask) && (!rm_outliers)){ /* no masking or outliers */ return; } currentFile = open_gz_cel_file(filename); /* read masks section */ if (rm_mask){ gzAdvanceToSection(currentFile,"[MASKS]",buffer); gzfindStartsWith(currentFile,"NumberCells=",buffer); cur_tokenset = tokenize(buffer,"="); numcells = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"CellHeader=",buffer); for (i =0; i < numcells; i++){ ReadgzFileLine(buffer, BUF_SIZE, currentFile); cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); cur_index = cur_x + chip_dim_rows*(cur_y); intensity[chip_num*rows + cur_index] = R_NaN; delete_tokens(cur_tokenset); } } /* read outliers section */ if (rm_outliers){ gzAdvanceToSection(currentFile,"[OUTLIERS]",buffer); gzfindStartsWith(currentFile,"NumberCells=",buffer); cur_tokenset = tokenize(buffer,"="); numcells = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"CellHeader=",buffer); for (i = 0; i < numcells; i++){ ReadgzFileLine(buffer, BUF_SIZE, currentFile); cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); cur_index = cur_x + chip_dim_rows*(cur_y); intensity[chip_num*rows + cur_index] = R_NaReal; delete_tokens(cur_tokenset); } } gzclose(currentFile); } /************************************************************************* ** ** char *gz_get_header_info(const char *filename, int *dim1, int *dim2) ** ** const char *filename - file to open ** int *dim1 - place to store Cols ** int *dim2 - place to store Rows ** ** returns a character string containing the CDF name. ** ** gets the header information (cols, rows and cdfname) ** ************************************************************************/ static char *gz_get_header_info(const char *filename, int *dim1, int *dim2){ int i,endpos; char *cdfName = NULL; gzFile currentFile; char buffer[BUF_SIZE]; tokenset *cur_tokenset; currentFile = open_gz_cel_file(filename); gzAdvanceToSection(currentFile,"[HEADER]",buffer); gzfindStartsWith(currentFile,"Cols",buffer); cur_tokenset = tokenize(buffer,"="); *dim1 = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"Rows",buffer); cur_tokenset = tokenize(buffer,"="); *dim2 = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"DatHeader",buffer); cur_tokenset = tokenize(buffer," "); for (i =0; i < tokenset_size(cur_tokenset);i++){ /* look for a token ending in ".1sq" */ endpos=token_ends_with(get_token(cur_tokenset,i),".1sq"); if(endpos > 0){ /* Found the likely CDF name, now chop of .1sq and store it */ cdfName= Calloc(endpos+1,char); strncpy(cdfName,get_token(cur_tokenset,i),endpos); cdfName[endpos] = '\0'; break; } if (i == (tokenset_size(cur_tokenset) - 1)){ error("Cel file %s does not seem to be have cdf information",filename); } } delete_tokens(cur_tokenset); gzclose(currentFile); return(cdfName); } /************************************************************************* ** ** char *gz_get_detailed_header_info(const char *filename, detailed_header_info *header_info) ** ** const char *filename - file to open ** detailed_header_info *header_info - place to store header information ** ** reads the header information from a gzipped text cdf file (ignoring some fields ** that are unused). ** ************************************************************************/ static void gz_get_detailed_header_info(const char *filename, detailed_header_info *header_info){ int i,endpos; gzFile currentFile; char buffer[BUF_SIZE]; char *buffercopy; tokenset *cur_tokenset; currentFile = open_gz_cel_file(filename); gzAdvanceToSection(currentFile,"[HEADER]",buffer); gzfindStartsWith(currentFile,"Cols",buffer); cur_tokenset = tokenize(buffer,"="); header_info->cols = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"Rows",buffer); cur_tokenset = tokenize(buffer,"="); header_info->rows = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"GridCornerUL",buffer); cur_tokenset = tokenize(buffer,"= "); header_info->GridCornerULx = atoi(get_token(cur_tokenset,1)); header_info->GridCornerULy = atoi(get_token(cur_tokenset,2)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"GridCornerUR",buffer); cur_tokenset = tokenize(buffer,"= "); header_info->GridCornerURx = atoi(get_token(cur_tokenset,1)); header_info->GridCornerURy = atoi(get_token(cur_tokenset,2)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"GridCornerLR",buffer); cur_tokenset = tokenize(buffer,"= "); header_info->GridCornerLRx = atoi(get_token(cur_tokenset,1)); header_info->GridCornerLRy = atoi(get_token(cur_tokenset,2)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"GridCornerLL",buffer); cur_tokenset = tokenize(buffer,"= "); header_info->GridCornerLLx = atoi(get_token(cur_tokenset,1)); header_info->GridCornerLLy = atoi(get_token(cur_tokenset,2)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"DatHeader",buffer); /* first lets copy the entire string over */ buffercopy = Calloc(strlen(buffer)+1,char); strcpy(buffercopy,buffer); cur_tokenset = tokenize(buffercopy,"\r\n"); header_info->DatHeader = Calloc(strlen(get_token(cur_tokenset,0))-8,char); strcpy(header_info->DatHeader,(get_token(cur_tokenset,0)+10)); /* the +10 is to avoid the starting "DatHeader=" */ Free(buffercopy); delete_tokens(cur_tokenset); /* now pull out the actual cdfname */ cur_tokenset = tokenize(buffer," "); for (i =0; i < tokenset_size(cur_tokenset);i++){ /* look for a token ending in ".1sq" */ endpos=token_ends_with(get_token(cur_tokenset,i),".1sq"); if(endpos > 0){ /* Found the likely CDF name, now chop of .1sq and store it */ header_info->cdfName= Calloc(endpos+1,char); strncpy( header_info->cdfName,get_token(cur_tokenset,i),endpos); header_info->cdfName[endpos] = '\0'; break; } if (i == (tokenset_size(cur_tokenset) - 1)){ error("Cel file %s does not seem to be have cdf information",filename); } } delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"Algorithm",buffer); cur_tokenset = tokenize(buffer,"=\r\n"); header_info->Algorithm = Calloc(strlen(get_token(cur_tokenset,1))+1,char); strcpy(header_info->Algorithm,get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"AlgorithmParameters",buffer); cur_tokenset = tokenize(buffer,"=\r\n"); header_info->AlgorithmParameters = Calloc(strlen(get_token(cur_tokenset,1))+1,char); strcpy(header_info->AlgorithmParameters,get_token(cur_tokenset,1)); gzclose(currentFile); header_info->ScanDate = Calloc(2, char); } /**************************************************************** ** ** static void gz_get_masks_outliers(const char *filename, ** int *nmasks, short **masks_x, short **masks_y, ** int *noutliers, short **outliers_x, short **outliers_y ** ** This gets the x and y coordinates stored in the masks and outliers sections ** of the cel files. (for gzipped text CEL files) ** ****************************************************************/ static void gz_get_masks_outliers(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y){ gzFile currentFile; char buffer[BUF_SIZE]; int numcells, cur_x, cur_y; tokenset *cur_tokenset; int i; currentFile = open_gz_cel_file(filename); /* read masks section */ gzAdvanceToSection(currentFile,"[MASKS]",buffer); gzfindStartsWith(currentFile,"NumberCells=",buffer); cur_tokenset = tokenize(buffer,"="); numcells = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"CellHeader=",buffer); *nmasks = numcells; *masks_x = Calloc(numcells,short); *masks_y = Calloc(numcells,short); for (i =0; i < numcells; i++){ ReadgzFileLine(buffer, BUF_SIZE, currentFile); cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); (*masks_x)[i] = (short)cur_x; (*masks_y)[i] = (short)cur_y; delete_tokens(cur_tokenset); } /* read outliers section */ gzAdvanceToSection(currentFile,"[OUTLIERS]",buffer); gzfindStartsWith(currentFile,"NumberCells=",buffer); cur_tokenset = tokenize(buffer,"="); numcells = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); gzfindStartsWith(currentFile,"CellHeader=",buffer); *noutliers = numcells; *outliers_x = Calloc(numcells,short); *outliers_y = Calloc(numcells,short); for (i = 0; i < numcells; i++){ ReadgzFileLine(buffer, BUF_SIZE, currentFile); cur_tokenset = tokenize(buffer," \t"); cur_x = atoi(get_token(cur_tokenset,0)); cur_y = atoi(get_token(cur_tokenset,1)); /* Rprintf("%d: %d %d %d\n",i, cur_x,cur_y, numcells); */ (*outliers_x)[i] = (short)cur_x; (*outliers_y)[i] = (short)cur_y; delete_tokens(cur_tokenset); } gzclose(currentFile); } #endif /*************************************************************** ** ** int isgzTextCelFile(const char *filename) ** ** test whether the file is a valid gzipped text cel file ** ** **************************************************************/ static int isgzTextCelFile(const char *filename){ #if defined HAVE_ZLIB const char *mode = "rb"; gzFile currentFile = NULL; char buffer[BUF_SIZE]; currentFile = gzopen(filename,mode); if (currentFile == NULL){ error("Could not open file %s", filename); } else { /** check to see if first line is [CEL] so looks like a CEL file**/ ReadgzFileLine(buffer, BUF_SIZE, currentFile); gzclose(currentFile); /* fixed by WH 28 Dec 2003 */ if (strncmp("[CEL]", buffer, 4) == 0) { return 1; } } #endif return 0; } /*************************************************************** *************************************************************** ** ** Code for manipulating the cdfInfo ** *************************************************************** ***************************************************************/ /************************************************************************* ** ** static int CountCDFProbes(SEXP cdfInfo) ** ** SEXP cdfInfo - a list of matrices, each containing matrix of PM/MM probe ** indicies ** ** returns the number of probes (PM) ** ** ** ** *************************************************************************/ static int CountCDFProbes(SEXP cdfInfo){ int i; int n_probes = 0; int n_probesets = GET_LENGTH(cdfInfo); for (i =0; i < n_probesets; i++){ n_probes +=INTEGER(getAttrib(VECTOR_ELT(cdfInfo,i),R_DimSymbol))[0]; } return n_probes; } /************************************************************************* ** ** static void storeIntensities(double *CurintensityMatrix,double *pmMatrix, ** double *mmMatrix, int curcol ,int rows,int cols, ** int chip_dim_rows,SEXP cdfInfo) ** ** double *CurintensityMatrix ** ** *************************************************************************/ static void storeIntensities(double *CurintensityMatrix, double *pmMatrix, double *mmMatrix, size_t curcol, size_t rows, size_t cols, size_t tot_n_probes, SEXP cdfInfo, int which){ size_t i = 0,j=0, currow=0; #ifndef USE_PTHREADS int n_probes=0; int n_probesets = GET_LENGTH(cdfInfo); double *cur_index; SEXP curIndices; #endif for (i=0; i < n_probesets; i++){ #ifdef USE_PTHREADS for (j=0; j < n_probes[i]; j++){ if (which >= 0){ pmMatrix[curcol*tot_n_probes + currow] = CurintensityMatrix[(int)cur_indexes[i][j] - 1]; } if (which <= 0){ mmMatrix[curcol*tot_n_probes + currow] = CurintensityMatrix[(int)cur_indexes[i][j+n_probes[i]] - 1]; } currow++; } #else curIndices = VECTOR_ELT(cdfInfo,i); n_probes = INTEGER(getAttrib(curIndices,R_DimSymbol))[0]; cur_index = NUMERIC_POINTER(AS_NUMERIC(curIndices)); for (j=0; j < n_probes; j++){ if (which >= 0){ pmMatrix[curcol*tot_n_probes + currow] = CurintensityMatrix[(int)cur_index[j] - 1]; } if (which <= 0){ mmMatrix[curcol*tot_n_probes + currow] = CurintensityMatrix[(int)cur_index[j+n_probes] - 1]; } currow++; } #endif } } /**************************************************************** **************************************************************** ** ** These is the code for reading binary CEL files. (Note ** not currently viable outside IA32) ** *************************************************************** ***************************************************************/ typedef struct{ int magic_number; int version_number; int cols; int rows; int n_cells; int header_len; char *header; int alg_len; char *algorithm; int alg_param_len; char *alg_param; int celmargin; unsigned int n_outliers; unsigned int n_masks; int n_subgrids; FILE *infile; gzFile gzinfile; } binary_header; typedef struct{ float cur_intens; float cur_sd; short npixels; } celintens_record; typedef struct{ short x; short y; } outliermask_loc; /************************************************************* ** ** int isBinaryCelFile(const char *filename) ** ** filename - Name of the prospective binary cel file ** ** Returns 1 if we find the appropriate parts of the ** header (a magic number of 64 followed by version number of ** 4) ** ** ** *************************************************************/ static int isBinaryCelFile(const char *filename){ FILE *infile; int magicnumber; int version_number; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); return 0; } if (!fread_int32(&magicnumber,1,infile)){ fclose(infile); return 0; } if (!fread_int32(&version_number,1,infile)){ fclose(infile); return 0; } if (magicnumber != 64){ fclose(infile); return 0; } if (version_number != 4){ fclose(infile); return 0; } fclose(infile); return 1; } /************************************************************* ** ** static void delete_binary_header(binary_header *my_header) ** ** binary_header *my_header ** ** frees memory allocated for binary_header structure ** *************************************************************/ static void delete_binary_header(binary_header *my_header){ Free(my_header->header); Free(my_header->algorithm); Free(my_header->alg_param); Free(my_header); } /************************************************************* ** ** static binary_header *read_binary_header(const char *filename, int return_stream, FILE *infile) ** ** const char *filename - name of binary cel file ** int return_stream - if 1 return the stream as part of the header, otherwise close the ** file at end of function. ** *************************************************************/ static binary_header *read_binary_header(const char *filename, int return_stream){ /* , FILE *infile){ */ FILE *infile; binary_header *this_header = Calloc(1,binary_header); /* Pass through all the header information */ if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } if (!fread_int32(&(this_header->magic_number),1,infile)){ error("The binary file %s does not have the appropriate magic number\n",filename); fclose(infile); return 0; } if (this_header->magic_number != 64){ error("The binary file %s does not have the appropriate magic number\n",filename); fclose(infile); return 0; } if (!fread_int32(&(this_header->version_number),1,infile)){ fclose(infile); return 0; } if (this_header->version_number != 4){ error("The binary file %s is not version 4. Cannot read\n",filename); fclose(infile); return 0; } /*** NOTE THE DOCUMENTATION ON THE WEB IS INCONSISTENT WITH THE TRUTH IF YOU LOOK AT THE FUSION SDK */ /** DOCS - cols then rows , FUSION - rows then cols */ /** We follow FUSION here (in the past we followed the DOCS **/ if (!fread_int32(&(this_header->rows),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } if (!fread_int32(&(this_header->cols),1,infile)){ error("Binary file corrupted? Could not read any further\n"); return 0; } if (!fread_int32(&(this_header->n_cells),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } if (this_header->n_cells != (this_header->cols)*(this_header->rows)){ error("The number of cells does not seem to be equal to cols*rows in %s.\n",filename); } if (!fread_int32(&(this_header->header_len),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } this_header->header = Calloc(this_header->header_len+1,char); if (!fread(this_header->header,sizeof(char),this_header->header_len,infile)){ error("binary file corrupted? Could not read any further.\n"); } if (!fread_int32(&(this_header->alg_len),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } this_header->algorithm = Calloc(this_header->alg_len+1,char); if (!fread_char(this_header->algorithm,this_header->alg_len,infile)){ error("binary file corrupted? Could not read any further.\n"); } if (!fread_int32(&(this_header->alg_param_len),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } this_header->alg_param = Calloc(this_header->alg_param_len+1,char); if (!fread_char(this_header->alg_param,this_header->alg_param_len,infile)){ error("binary file corrupted? Could not read any further.\n"); } if (!fread_int32(&(this_header->celmargin),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } if (!fread_uint32(&(this_header->n_outliers),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } if (!fread_uint32(&(this_header->n_masks),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } if (!fread_int32(&(this_header->n_subgrids),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } if (!return_stream){ fclose(infile); } else { this_header->infile = infile; } return this_header; } /************************************************************* ** ** static char *binary_get_header_info(const char *filename, int *dim1, int *dim2) ** ** this function pulls out the rows, cols and cdfname ** from the header of a binary cel file ** *************************************************************/ static char *binary_get_header_info(const char *filename, int *dim1, int *dim2){ char *cdfName =0; tokenset *my_tokenset; int i = 0,endpos; binary_header *my_header; my_header = read_binary_header(filename,0); *dim1 = my_header->cols; *dim2 = my_header->rows; my_tokenset = tokenize(my_header->header," "); for (i =0; i < tokenset_size(my_tokenset);i++){ /* look for a token ending in ".1sq" */ endpos=token_ends_with(get_token(my_tokenset,i),".1sq"); if(endpos > 0){ /* Found the likely CDF name, now chop of .1sq and store it */ cdfName= Calloc(endpos+1,char); strncpy(cdfName,get_token(my_tokenset,i),endpos); cdfName[endpos] = '\0'; break; } if (i == (tokenset_size(my_tokenset) - 1)){ error("Cel file %s does not seem to be have cdf information",filename); } } delete_binary_header(my_header); delete_tokens(my_tokenset); return(cdfName); } /************************************************************************* ** ** void binary_get_detailed_header_info(const char *filename, detailed_header_info *header_info) ** ** const char *filename - file to open ** detailed_header_info *header_info - place to store header information ** ** reads the header information from a binary cdf file (ignoring some fields ** that are unused). ** ************************************************************************/ static void binary_get_detailed_header_info(const char *filename, detailed_header_info *header_info){ /* char *cdfName =0; */ tokenset *my_tokenset; tokenset *temp_tokenset; char *header_copy; char *tmpbuffer; int i = 0,endpos; binary_header *my_header; my_header = read_binary_header(filename,0); header_info->cols = my_header->cols; header_info->rows = my_header->rows; header_info->Algorithm = Calloc(strlen(my_header->algorithm)+1,char); strcpy(header_info->Algorithm,my_header->algorithm); header_info->AlgorithmParameters = Calloc(strlen(my_header->alg_param)+1,char); strncpy(header_info->AlgorithmParameters,my_header->alg_param,strlen(my_header->alg_param)-1); /* Rprintf("%s\n\n\n",my_header->header); */ header_copy = Calloc(strlen(my_header->header) +1,char); strcpy(header_copy,my_header->header); my_tokenset = tokenize(header_copy,"\n"); /** Looking for GridCornerUL, GridCornerUR, GridCornerLR, GridCornerLL and DatHeader */ for (i =0; i < tokenset_size(my_tokenset);i++){ /* Rprintf("%d: %s\n",i,get_token(my_tokenset,i)); */ if (strncmp("GridCornerUL",get_token(my_tokenset,i),12) == 0){ tmpbuffer = Calloc(strlen(get_token(my_tokenset,i))+1,char); strcpy(tmpbuffer,get_token(my_tokenset,i)); temp_tokenset = tokenize(tmpbuffer,"= "); header_info->GridCornerULx = atoi(get_token(temp_tokenset,1)); header_info->GridCornerULy = atoi(get_token(temp_tokenset,2)); delete_tokens(temp_tokenset); Free(tmpbuffer); } if (strncmp("GridCornerUR",get_token(my_tokenset,i),12) == 0){ tmpbuffer = Calloc(strlen(get_token(my_tokenset,i))+1,char); strcpy(tmpbuffer,get_token(my_tokenset,i)); temp_tokenset = tokenize(tmpbuffer,"= "); header_info->GridCornerURx = atoi(get_token(temp_tokenset,1)); header_info->GridCornerURy = atoi(get_token(temp_tokenset,2)); delete_tokens(temp_tokenset); Free(tmpbuffer); } if (strncmp("GridCornerLR",get_token(my_tokenset,i),12) == 0){ tmpbuffer = Calloc(strlen(get_token(my_tokenset,i))+1,char); strcpy(tmpbuffer,get_token(my_tokenset,i)); temp_tokenset = tokenize(tmpbuffer,"= "); header_info->GridCornerLRx = atoi(get_token(temp_tokenset,1)); header_info->GridCornerLRy = atoi(get_token(temp_tokenset,2)); delete_tokens(temp_tokenset); Free(tmpbuffer); } if (strncmp("GridCornerLL",get_token(my_tokenset,i),12) == 0){ tmpbuffer = Calloc(strlen(get_token(my_tokenset,i))+1,char); strcpy(tmpbuffer,get_token(my_tokenset,i)); temp_tokenset = tokenize(tmpbuffer,"= "); header_info->GridCornerLLx = atoi(get_token(temp_tokenset,1)); header_info->GridCornerLLy = atoi(get_token(temp_tokenset,2)); delete_tokens(temp_tokenset); Free(tmpbuffer); } if (strncmp("DatHeader",get_token(my_tokenset,i),9) == 0){ header_info->DatHeader = Calloc(strlen(get_token(my_tokenset,i))+1, char); strcpy(header_info->DatHeader,(get_token(my_tokenset,i)+10)); } } delete_tokens(my_tokenset); Free(header_copy); header_copy = Calloc(my_header->header_len +1,char); strcpy(header_copy,my_header->header); my_tokenset = tokenize(header_copy," "); for (i =0; i < tokenset_size(my_tokenset);i++){ /* look for a token ending in ".1sq" */ endpos=token_ends_with(get_token(my_tokenset,i),".1sq"); if(endpos > 0){ /* Found the likely CDF name, now chop of .1sq and store it */ header_info->cdfName= Calloc(endpos+1,char); strncpy(header_info->cdfName,get_token(my_tokenset,i),endpos); header_info->cdfName[endpos] = '\0'; break; } if (i == (tokenset_size(my_tokenset) - 1)){ error("Cel file %s does not seem to be have cdf information",filename); } } header_info->ScanDate = Calloc(2, char); delete_tokens(my_tokenset); delete_binary_header(my_header); Free(header_copy); } /*************************************************************** ** ** static int check_binary_cel_file(const char *filename, char *ref_cdfName, int ref_dim_1, int ref_dim_2) ** ** This function checks a binary cel file to see if it has the ** expected rows, cols and cdfname ** **************************************************************/ static int check_binary_cel_file(const char *filename, const char *ref_cdfName, int ref_dim_1, int ref_dim_2){ char *cdfName =0; tokenset *my_tokenset; int i = 0,endpos; binary_header *my_header; my_header = read_binary_header(filename,0); if ((my_header->cols != ref_dim_1) || (my_header->rows != ref_dim_2)){ error("Cel file %s does not seem to have the correct dimensions",filename); } my_tokenset = tokenize(my_header->header," "); for (i =0; i < tokenset_size(my_tokenset);i++){ /* look for a token ending in ".1sq" */ endpos=token_ends_with(get_token(my_tokenset,i),".1sq"); if(endpos > 0){ /* Found the likely CDF name, now chop of .1sq and store it */ cdfName= Calloc(endpos+1,char); strncpy(cdfName,get_token(my_tokenset,i),endpos); cdfName[endpos] = '\0'; break; } if (i == (tokenset_size(my_tokenset) - 1)){ error("Cel file %s does not seem to be have cdf information",filename); } } if (strncasecmp(cdfName,ref_cdfName,strlen(ref_cdfName)) != 0){ error("Cel file %s does not seem to be of %s type",filename,ref_cdfName); } delete_binary_header(my_header); delete_tokens(my_tokenset); Free(cdfName); return 0; } /*************************************************************** ** ** static int read_binarycel_file_intensities(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows) ** ** ** This function reads binary cel file intensities into the data matrix ** **************************************************************/ static int read_binarycel_file_intensities(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ size_t i=0, j=0; size_t cur_index; int fread_err=0; celintens_record *cur_intensity = Calloc(1,celintens_record); binary_header *my_header; my_header = read_binary_header(filename,1); for (i = 0; i < my_header->rows; i++){ for (j =0; j < my_header->cols; j++){ cur_index = j + my_header->cols*i; /* i + my_header->rows*j; */ fread_err = fread_float32(&(cur_intensity->cur_intens),1,my_header->infile); fread_err+= fread_float32(&(cur_intensity->cur_sd),1,my_header->infile); fread_err+=fread_int16(&(cur_intensity->npixels),1,my_header->infile); if (fread_err < 3){ fclose(my_header->infile); delete_binary_header(my_header); Free(cur_intensity); return 1; } if (cur_intensity->cur_intens < 0 || cur_intensity->cur_intens > 65536 || isnan(cur_intensity->cur_intens)){ fclose(my_header->infile); delete_binary_header(my_header); Free(cur_intensity); return 1; } fread_err=0; intensity[chip_num*my_header->n_cells + cur_index] = (double )cur_intensity->cur_intens; } } fclose(my_header->infile); delete_binary_header(my_header); Free(cur_intensity); return(0); } /*************************************************************** ** ** static int read_binarycel_file_stdev(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows) ** ** ** This function reads binary cel file stddev values into the data matrix ** **************************************************************/ static int read_binarycel_file_stddev(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ size_t i=0, j=0; size_t cur_index; int fread_err=0; celintens_record *cur_intensity = Calloc(1,celintens_record); binary_header *my_header; my_header = read_binary_header(filename,1); for (i = 0; i < my_header->rows; i++){ for (j =0; j < my_header->cols; j++){ cur_index = j + my_header->cols*i; /* i + my_header->rows*j; */ fread_err = fread_float32(&(cur_intensity->cur_intens),1,my_header->infile); fread_err+= fread_float32(&(cur_intensity->cur_sd),1,my_header->infile); fread_err+= fread_int16(&(cur_intensity->npixels),1,my_header->infile); if (fread_err < 3){ fclose(my_header->infile); delete_binary_header(my_header); Free(cur_intensity); return 1; } fread_err=0; intensity[chip_num*my_header->n_cells + cur_index] = (double )cur_intensity->cur_sd; } } fclose(my_header->infile); delete_binary_header(my_header); Free(cur_intensity); return(0); } /*************************************************************** ** ** static int read_binarycel_file_npixels(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows) ** ** ** This function reads binary cel file npixels values into the data matrix ** **************************************************************/ static int read_binarycel_file_npixels(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ size_t i=0, j=0; size_t cur_index; int fread_err=0; celintens_record *cur_intensity = Calloc(1,celintens_record); binary_header *my_header; my_header = read_binary_header(filename,1); for (i = 0; i < my_header->rows; i++){ for (j =0; j < my_header->cols; j++){ cur_index = j + my_header->cols*i; /* i + my_header->rows*j; */ fread_err = fread_float32(&(cur_intensity->cur_intens),1,my_header->infile); fread_err+= fread_float32(&(cur_intensity->cur_sd),1,my_header->infile); fread_err+= fread_int16(&(cur_intensity->npixels),1,my_header->infile); if (fread_err < 3){ fclose(my_header->infile); delete_binary_header(my_header); Free(cur_intensity); return 1; } fread_err=0; intensity[chip_num*my_header->n_cells + cur_index] = (double )cur_intensity->npixels; } } fclose(my_header->infile); delete_binary_header(my_header); Free(cur_intensity); return(0); } /*************************************************************** ** ** static void binary_apply_masks(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int rm_mask, int rm_outliers) ** ** ** **************************************************************/ static void binary_apply_masks(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows, int rm_mask, int rm_outliers){ size_t i=0; size_t cur_index; int sizeofrecords; outliermask_loc *cur_loc= Calloc(1,outliermask_loc); binary_header *my_header; my_header = read_binary_header(filename,1); sizeofrecords = 2*sizeof(float) + sizeof(short); /* sizeof(celintens_record) */ fseek(my_header->infile,my_header->n_cells*sizeofrecords,SEEK_CUR); if (rm_mask){ for (i =0; i < my_header->n_masks; i++){ fread_int16(&(cur_loc->x),1,my_header->infile); fread_int16(&(cur_loc->y),1,my_header->infile); cur_index = (int)cur_loc->x + my_header->rows*(int)cur_loc->y; /* cur_index = (int)cur_loc->y + my_header->rows*(int)cur_loc->x; */ /* intensity[chip_num*my_header->rows + cur_index] = R_NaN; */ intensity[chip_num*rows + cur_index] = R_NaN; } } else { fseek(my_header->infile,my_header->n_masks*sizeof(cur_loc),SEEK_CUR); } if (rm_outliers){ for (i =0; i < my_header->n_outliers; i++){ fread_int16(&(cur_loc->x),1,my_header->infile); fread_int16(&(cur_loc->y),1,my_header->infile); cur_index = (int)cur_loc->x + my_header->rows*(int)cur_loc->y; /* intensity[chip_num*my_header->n_cells + cur_index] = R_NaN; */ intensity[chip_num*rows + cur_index] = R_NaN; } } else { fseek(my_header->infile,my_header->n_outliers*sizeof(cur_loc),SEEK_CUR); } fclose(my_header->infile); delete_binary_header(my_header); Free(cur_loc); } /**************************************************************** ** ** static void binary_get_masks_outliers(const char *filename, ** int *nmasks, short **masks_x, short **masks_y, ** int *noutliers, short **outliers_x, short **outliers_y ** ** This gets the x and y coordinates stored in the masks and outliers sections ** of the cel files. (for binary CEL files) ** ****************************************************************/ static void binary_get_masks_outliers(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y){ int i=0; int sizeofrecords; outliermask_loc *cur_loc= Calloc(1,outliermask_loc); binary_header *my_header; my_header = read_binary_header(filename,1); sizeofrecords = 2*sizeof(float) + sizeof(short); fseek(my_header->infile,my_header->n_cells*sizeofrecords,SEEK_CUR); *nmasks = my_header->n_masks; *masks_x = Calloc(my_header->n_masks,short); *masks_y = Calloc(my_header->n_masks,short); for (i =0; i < my_header->n_masks; i++){ fread_int16(&(cur_loc->x),1,my_header->infile); fread_int16(&(cur_loc->y),1,my_header->infile); (*masks_x)[i] = (cur_loc->x); (*masks_y)[i] = (cur_loc->y); } *noutliers = my_header->n_outliers; *outliers_x = Calloc(my_header->n_outliers,short); *outliers_y = Calloc(my_header->n_outliers,short); for (i =0; i < my_header->n_outliers; i++){ fread_int16(&(cur_loc->x),1,my_header->infile); fread_int16(&(cur_loc->y),1,my_header->infile); (*outliers_x)[i] = (cur_loc->x); (*outliers_y)[i] = (cur_loc->y); } fclose(my_header->infile); delete_binary_header(my_header); Free(cur_loc); } /**************************************************************** **************************************************************** ** ** The following code is for supporting gzipped binary ** format CEL files. ** **************************************************************** ***************************************************************/ /************************************************************* ** ** int isgzBinaryCelFile(const char *filename) ** ** filename - Name of the prospective gzipped binary cel file ** ** Returns 1 if we find the appropriate parts of the ** header (a magic number of 64 followed by version number of ** 4) ** ** ** *************************************************************/ static int isgzBinaryCelFile(const char *filename){ gzFile infile; int magicnumber; int version_number; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); return 0; } if (!gzread_int32(&magicnumber,1,infile)){ gzclose(infile); return 0; } if (!gzread_int32(&version_number,1,infile)){ gzclose(infile); return 0; } if (magicnumber != 64){ gzclose(infile); return 0; } if (version_number != 4){ gzclose(infile); return 0; } gzclose(infile); return 1; } /************************************************************* ** ** static binary_header *gzread_binary_header(const char *filename, int return_stream, FILE *infile) ** ** const char *filename - name of binary cel file ** int return_stream - if 1 return the stream as part of the header, otherwise close the ** file at end of function. ** *************************************************************/ static binary_header *gzread_binary_header(const char *filename, int return_stream){ /* , FILE *infile){ */ gzFile infile; binary_header *this_header = Calloc(1,binary_header); /* Pass through all the header information */ if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } if (!gzread_int32(&(this_header->magic_number),1,infile)){ error("The binary file %s does not have the appropriate magic number\n",filename); return 0; } if (this_header->magic_number != 64){ error("The binary file %s does not have the appropriate magic number\n",filename); return 0; } if (!gzread_int32(&(this_header->version_number),1,infile)){ return 0; } if (this_header->version_number != 4){ error("The binary file %s is not version 4. Cannot read\n",filename); return 0; } /*** NOTE THE DOCUMENTATION ON THE WEB IS INCONSISTENT WITH THE TRUTH IF YOU LOOK AT THE FUSION SDK */ /** DOCS - cols then rows , FUSION - rows then cols */ /** We follow FUSION here (in the past we followed the DOCS **/ if (!gzread_int32(&(this_header->rows),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } if (!gzread_int32(&(this_header->cols),1,infile)){ error("Binary file corrupted? Could not read any further\n"); return 0; } if (!gzread_int32(&(this_header->n_cells),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } if (this_header->n_cells != (this_header->cols)*(this_header->rows)){ error("The number of cells does not seem to be equal to cols*rows in %s.\n",filename); } if (!gzread_int32(&(this_header->header_len),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } this_header->header = Calloc(this_header->header_len+1,char); if (!gzread(infile,this_header->header,sizeof(char)*this_header->header_len)){ error("binary file corrupted? Could not read any further.\n"); } if (!gzread_int32(&(this_header->alg_len),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } this_header->algorithm = Calloc(this_header->alg_len+1,char); if (!gzread_char(this_header->algorithm,this_header->alg_len,infile)){ error("binary file corrupted? Could not read any further.\n"); } if (!gzread_int32(&(this_header->alg_param_len),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } this_header->alg_param = Calloc(this_header->alg_param_len+1,char); if (!gzread_char(this_header->alg_param,this_header->alg_param_len,infile)){ error("binary file corrupted? Could not read any further.\n"); } if (!gzread_int32(&(this_header->celmargin),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } if (!gzread_uint32(&(this_header->n_outliers),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } if (!gzread_uint32(&(this_header->n_masks),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } if (!gzread_int32(&(this_header->n_subgrids),1,infile)){ error("Binary file corrupted? Could not read any further\n"); } if (!return_stream){ gzclose(infile); } else { this_header->gzinfile = infile; } return this_header; } /************************************************************* ** ** static char *binary_get_header_info(const char *filename, int *dim1, int *dim2) ** ** this function pulls out the rows, cols and cdfname ** from the header of a binary cel file ** *************************************************************/ static char *gzbinary_get_header_info(const char *filename, int *dim1, int *dim2){ char *cdfName =0; tokenset *my_tokenset; int i = 0,endpos; binary_header *my_header; my_header = gzread_binary_header(filename,0); *dim1 = my_header->cols; *dim2 = my_header->rows; my_tokenset = tokenize(my_header->header," "); for (i =0; i < tokenset_size(my_tokenset);i++){ /* look for a token ending in ".1sq" */ endpos=token_ends_with(get_token(my_tokenset,i),".1sq"); if(endpos > 0){ /* Found the likely CDF name, now chop of .1sq and store it */ cdfName= Calloc(endpos+1,char); strncpy(cdfName,get_token(my_tokenset,i),endpos); cdfName[endpos] = '\0'; break; } if (i == (tokenset_size(my_tokenset) - 1)){ error("Cel file %s does not seem to be have cdf information",filename); } } delete_binary_header(my_header); delete_tokens(my_tokenset); return(cdfName); } /************************************************************************* ** ** void gzbinary_get_detailed_header_info(const char *filename, detailed_header_info *header_info) ** ** const char *filename - file to open ** detailed_header_info *header_info - place to store header information ** ** reads the header information from a gzipped binary cdf file (ignoring some fields ** that are unused). ** ************************************************************************/ static void gzbinary_get_detailed_header_info(const char *filename, detailed_header_info *header_info){ /* char *cdfName =0; */ tokenset *my_tokenset; tokenset *temp_tokenset; char *header_copy; char *tmpbuffer; int i = 0,endpos; binary_header *my_header; my_header = gzread_binary_header(filename,0); header_info->cols = my_header->cols; header_info->rows = my_header->rows; header_info->Algorithm = Calloc(strlen(my_header->algorithm)+1,char); strcpy(header_info->Algorithm,my_header->algorithm); header_info->AlgorithmParameters = Calloc(strlen(my_header->alg_param)+1,char); strncpy(header_info->AlgorithmParameters,my_header->alg_param,strlen(my_header->alg_param)-1); /* Rprintf("%s\n\n\n",my_header->header); */ header_copy = Calloc(strlen(my_header->header) +1,char); strcpy(header_copy,my_header->header); my_tokenset = tokenize(header_copy,"\n"); /** Looking for GridCornerUL, GridCornerUR, GridCornerLR, GridCornerLL and DatHeader */ for (i =0; i < tokenset_size(my_tokenset);i++){ /* Rprintf("%d: %s\n",i,get_token(my_tokenset,i)); */ if (strncmp("GridCornerUL",get_token(my_tokenset,i),12) == 0){ tmpbuffer = Calloc(strlen(get_token(my_tokenset,i))+1,char); strcpy(tmpbuffer,get_token(my_tokenset,i)); temp_tokenset = tokenize(tmpbuffer,"= "); header_info->GridCornerULx = atoi(get_token(temp_tokenset,1)); header_info->GridCornerULy = atoi(get_token(temp_tokenset,2)); delete_tokens(temp_tokenset); Free(tmpbuffer); } if (strncmp("GridCornerUR",get_token(my_tokenset,i),12) == 0){ tmpbuffer = Calloc(strlen(get_token(my_tokenset,i))+1,char); strcpy(tmpbuffer,get_token(my_tokenset,i)); temp_tokenset = tokenize(tmpbuffer,"= "); header_info->GridCornerURx = atoi(get_token(temp_tokenset,1)); header_info->GridCornerURy = atoi(get_token(temp_tokenset,2)); delete_tokens(temp_tokenset); Free(tmpbuffer); } if (strncmp("GridCornerLR",get_token(my_tokenset,i),12) == 0){ tmpbuffer = Calloc(strlen(get_token(my_tokenset,i))+1,char); strcpy(tmpbuffer,get_token(my_tokenset,i)); temp_tokenset = tokenize(tmpbuffer,"= "); header_info->GridCornerLRx = atoi(get_token(temp_tokenset,1)); header_info->GridCornerLRy = atoi(get_token(temp_tokenset,2)); delete_tokens(temp_tokenset); Free(tmpbuffer); } if (strncmp("GridCornerLL",get_token(my_tokenset,i),12) == 0){ tmpbuffer = Calloc(strlen(get_token(my_tokenset,i))+1,char); strcpy(tmpbuffer,get_token(my_tokenset,i)); temp_tokenset = tokenize(tmpbuffer,"= "); header_info->GridCornerLLx = atoi(get_token(temp_tokenset,1)); header_info->GridCornerLLy = atoi(get_token(temp_tokenset,2)); delete_tokens(temp_tokenset); Free(tmpbuffer); } if (strncmp("DatHeader",get_token(my_tokenset,i),9) == 0){ header_info->DatHeader = Calloc(strlen(get_token(my_tokenset,i))+1, char); strcpy(header_info->DatHeader,(get_token(my_tokenset,i)+10)); } } delete_tokens(my_tokenset); Free(header_copy); header_copy = Calloc(my_header->header_len +1,char); strcpy(header_copy,my_header->header); my_tokenset = tokenize(header_copy," "); for (i =0; i < tokenset_size(my_tokenset);i++){ /* look for a token ending in ".1sq" */ endpos=token_ends_with(get_token(my_tokenset,i),".1sq"); if(endpos > 0){ /* Found the likely CDF name, now chop of .1sq and store it */ header_info->cdfName= Calloc(endpos+1,char); strncpy(header_info->cdfName,get_token(my_tokenset,i),endpos); header_info->cdfName[endpos] = '\0'; break; } if (i == (tokenset_size(my_tokenset) - 1)){ error("Cel file %s does not seem to be have cdf information",filename); } } header_info->ScanDate = Calloc(2, char); delete_tokens(my_tokenset); delete_binary_header(my_header); Free(header_copy); } /*************************************************************** ** ** static int check_binary_cel_file(const char *filename, char *ref_cdfName, int ref_dim_1, int ref_dim_2) ** ** This function checks a binary cel file to see if it has the ** expected rows, cols and cdfname ** **************************************************************/ static int check_gzbinary_cel_file(const char *filename, const char *ref_cdfName, int ref_dim_1, int ref_dim_2){ char *cdfName =0; tokenset *my_tokenset; int i = 0,endpos; binary_header *my_header; my_header = gzread_binary_header(filename,0); if ((my_header->cols != ref_dim_1) || (my_header->rows != ref_dim_2)){ error("Cel file %s does not seem to have the correct dimensions",filename); } my_tokenset = tokenize(my_header->header," "); for (i =0; i < tokenset_size(my_tokenset);i++){ /* look for a token ending in ".1sq" */ endpos=token_ends_with(get_token(my_tokenset,i),".1sq"); if(endpos > 0){ /* Found the likely CDF name, now chop of .1sq and store it */ cdfName= Calloc(endpos+1,char); strncpy(cdfName,get_token(my_tokenset,i),endpos); cdfName[endpos] = '\0'; break; } if (i == (tokenset_size(my_tokenset) - 1)){ error("Cel file %s does not seem to be have cdf information",filename); } } if (strncasecmp(cdfName,ref_cdfName,strlen(ref_cdfName)) != 0){ error("Cel file %s does not seem to be of %s type",filename,ref_cdfName); } delete_binary_header(my_header); delete_tokens(my_tokenset); Free(cdfName); return 0; } /*************************************************************** ** ** static int gzread_binarycel_file_intensities(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows) ** ** ** This function reads gzipped binary cel file intensities into the data matrix ** **************************************************************/ static int gzread_binarycel_file_intensities(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ size_t i=0, j=0; size_t cur_index; int fread_err=0; celintens_record *cur_intensity = Calloc(1,celintens_record); binary_header *my_header; my_header = gzread_binary_header(filename,1); for (i = 0; i < my_header->rows; i++){ for (j =0; j < my_header->cols; j++){ cur_index = j + my_header->cols*i; /* i + my_header->rows*j; */ fread_err = gzread_float32(&(cur_intensity->cur_intens),1,my_header->gzinfile); fread_err+= gzread_float32(&(cur_intensity->cur_sd),1,my_header->gzinfile); fread_err+= gzread_int16(&(cur_intensity->npixels),1,my_header->gzinfile); if (fread_err < 3){ gzclose(my_header->gzinfile); delete_binary_header(my_header); Free(cur_intensity); return 1; } if (cur_intensity->cur_intens < 0 || cur_intensity->cur_intens > 65536 || isnan(cur_intensity->cur_intens)){ gzclose(my_header->gzinfile); delete_binary_header(my_header); Free(cur_intensity); return 1; } fread_err=0; intensity[chip_num*my_header->n_cells + cur_index] = (double )cur_intensity->cur_intens; } } gzclose(my_header->gzinfile); delete_binary_header(my_header); Free(cur_intensity); return(0); } /*************************************************************** ** ** static int gzread_binarycel_file_stdev(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows) ** ** ** This function reads binary cel file stddev values into the data matrix ** **************************************************************/ static int gzread_binarycel_file_stddev(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ size_t i=0, j=0; size_t cur_index; int fread_err=0; celintens_record *cur_intensity = Calloc(1,celintens_record); binary_header *my_header; my_header = gzread_binary_header(filename,1); for (i = 0; i < my_header->rows; i++){ for (j =0; j < my_header->cols; j++){ cur_index = j + my_header->cols*i; /* i + my_header->rows*j; */ fread_err = gzread_float32(&(cur_intensity->cur_intens),1,my_header->gzinfile); fread_err+= gzread_float32(&(cur_intensity->cur_sd),1,my_header->gzinfile); fread_err+= gzread_int16(&(cur_intensity->npixels),1,my_header->gzinfile); if (fread_err < 3){ gzclose(my_header->gzinfile); delete_binary_header(my_header); Free(cur_intensity); return 1; } fread_err=0; intensity[chip_num*my_header->n_cells + cur_index] = (double )cur_intensity->cur_sd; } } gzclose(my_header->gzinfile); delete_binary_header(my_header); Free(cur_intensity); return(0); } /*************************************************************** ** ** static int read_binarycel_file_npixels(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows) ** ** ** This function reads binary cel file npixels values into the data matrix ** **************************************************************/ static int gzread_binarycel_file_npixels(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ size_t i=0, j=0; size_t cur_index; int fread_err=0; celintens_record *cur_intensity = Calloc(1,celintens_record); binary_header *my_header; my_header = gzread_binary_header(filename,1); for (i = 0; i < my_header->rows; i++){ for (j =0; j < my_header->cols; j++){ cur_index = j + my_header->cols*i; /* i + my_header->rows*j; */ fread_err = gzread_float32(&(cur_intensity->cur_intens),1,my_header->gzinfile); fread_err+= gzread_float32(&(cur_intensity->cur_sd),1,my_header->gzinfile); fread_err+= gzread_int16(&(cur_intensity->npixels),1,my_header->gzinfile); if (fread_err < 3){ gzclose(my_header->gzinfile); delete_binary_header(my_header); Free(cur_intensity); return 1; } fread_err=0; intensity[chip_num*my_header->n_cells + cur_index] = (double )cur_intensity->npixels; } } gzclose(my_header->gzinfile); delete_binary_header(my_header); Free(cur_intensity); return(0); } /*************************************************************** ** ** static void gz_binary_apply_masks(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int rm_mask, int rm_outliers) ** ** ** **************************************************************/ static void gz_binary_apply_masks(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows, int rm_mask, int rm_outliers){ size_t i=0; size_t cur_index; int sizeofrecords; outliermask_loc *cur_loc= Calloc(1,outliermask_loc); binary_header *my_header; my_header = gzread_binary_header(filename,1); sizeofrecords = 2*sizeof(float) + sizeof(short); /* sizeof(celintens_record) */ //fseek(my_header->infile,my_header->n_cells*sizeofrecords,SEEK_CUR); gzseek(my_header->gzinfile,my_header->n_cells*sizeofrecords,SEEK_CUR); if (rm_mask){ for (i =0; i < my_header->n_masks; i++){ gzread_int16(&(cur_loc->x),1,my_header->gzinfile); gzread_int16(&(cur_loc->y),1,my_header->gzinfile); cur_index = (int)cur_loc->x + my_header->rows*(int)cur_loc->y; /* cur_index = (int)cur_loc->y + my_header->rows*(int)cur_loc->x; */ /* intensity[chip_num*my_header->rows + cur_index] = R_NaN; */ intensity[chip_num*rows + cur_index] = R_NaN; } } else { gzseek(my_header->gzinfile,my_header->n_masks*sizeof(cur_loc),SEEK_CUR); } if (rm_outliers){ for (i =0; i < my_header->n_outliers; i++){ gzread_int16(&(cur_loc->x),1,my_header->gzinfile); gzread_int16(&(cur_loc->y),1,my_header->gzinfile); cur_index = (int)cur_loc->x + my_header->rows*(int)cur_loc->y; /* intensity[chip_num*my_header->n_cells + cur_index] = R_NaN; */ intensity[chip_num*rows + cur_index] = R_NaN; } } else { gzseek(my_header->gzinfile,my_header->n_outliers*sizeof(cur_loc),SEEK_CUR); } gzclose(my_header->gzinfile); delete_binary_header(my_header); Free(cur_loc); } /**************************************************************** ** ** static void gzbinary_get_masks_outliers(const char *filename, ** int *nmasks, short **masks_x, short **masks_y, ** int *noutliers, short **outliers_x, short **outliers_y ** ** This gets the x and y coordinates stored in the masks and outliers sections ** of the cel files. (for binary CEL files) ** ****************************************************************/ static void gzbinary_get_masks_outliers(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y){ int i=0; int sizeofrecords; outliermask_loc *cur_loc= Calloc(1,outliermask_loc); binary_header *my_header; my_header = gzread_binary_header(filename,1); sizeofrecords = 2*sizeof(float) + sizeof(short); gzseek(my_header->gzinfile,my_header->n_cells*sizeofrecords,SEEK_CUR); *nmasks = my_header->n_masks; *masks_x = Calloc(my_header->n_masks,short); *masks_y = Calloc(my_header->n_masks,short); for (i =0; i < my_header->n_masks; i++){ gzread_int16(&(cur_loc->x),1,my_header->gzinfile); gzread_int16(&(cur_loc->y),1,my_header->gzinfile); (*masks_x)[i] = (cur_loc->x); (*masks_y)[i] = (cur_loc->y); } *noutliers = my_header->n_outliers; *outliers_x = Calloc(my_header->n_outliers,short); *outliers_y = Calloc(my_header->n_outliers,short); for (i =0; i < my_header->n_outliers; i++){ gzread_int16(&(cur_loc->x),1,my_header->gzinfile); gzread_int16(&(cur_loc->y),1,my_header->gzinfile); (*outliers_x)[i] = (cur_loc->x); (*outliers_y)[i] = (cur_loc->y); } gzclose(my_header->gzinfile); delete_binary_header(my_header); Free(cur_loc); } /**************************************************************** **************************************************************** ** ** This is the code that interfaces with R ** *************************************************************** ***************************************************************/ /************************************************************************ ** ** SEXP read_abatch(SEXP filenames, SEXP compress, ** SEXP rm_mask, SEXP rm_outliers, SEXP rm_extra, ** SEXP ref_cdfName) ** ** SEXP filenames - an R character vector of filenames to read ** SEXP compress - logical flag TRUE means files are *.gz ** SEXP rm_mask - if true set MASKS to NA ** SEXP rm_outliers - if true set OUTLIERS to NA ** SEXP rm_extra - if true overrides rm_mask and rm_outliers settings ** SEXP ref_cdfName - the reference CDF name to check each CEL file against ** SEXP ref_dim - cols/rows of reference chip ** SEXP verbose - if verbose print out more information to the screen ** ** RETURNS an intensity matrix with cel file intensities from ** each chip in columns ** ** this function will read in all the cel files in a affybatch. ** this function will stop on possible errors with an error() call. ** ** The intensity matrix will be allocated here. It will be given ** column names here. the column names that it will be given here are the ** filenames. ** *************************************************************************/ SEXP read_abatch(SEXP filenames, SEXP rm_mask, SEXP rm_outliers, SEXP rm_extra, SEXP ref_cdfName, SEXP ref_dim, SEXP verbose){ int i; int n_files; int ref_dim_1, ref_dim_2; const char *cur_file_name; const char *cdfName; double *intensityMatrix; SEXP intensity,names,dimnames; if (!isString(filenames)) error("read_abatch: filenames argument must be a character vector"); ref_dim_1 = INTEGER(ref_dim)[0]; ref_dim_2 = INTEGER(ref_dim)[1]; n_files = GET_LENGTH(filenames); PROTECT(intensity = allocMatrix(REALSXP, ref_dim_1*ref_dim_2, n_files)); cdfName = CHAR(STRING_ELT(ref_cdfName,0)); intensityMatrix = NUMERIC_POINTER(AS_NUMERIC(intensity)); /* before we do any real reading check that all the files are of the same cdf type */ for (i =0; i < n_files; i++){ cur_file_name = CHAR(STRING_ELT(filenames, i)); if (isTextCelFile(cur_file_name)){ if (check_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isgzTextCelFile(cur_file_name)){ #if defined HAVE_ZLIB if (check_gzcel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(cur_file_name)){ if (check_binary_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isgzBinaryCelFile(cur_file_name)){ if (check_gzbinary_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if(isGenericCelFile(cur_file_name)){ if (check_generic_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if(isgzGenericCelFile(cur_file_name)){ if (check_gzgeneric_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary, gzipped binary, command console and gzipped command console formats..\n",cur_file_name); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",cur_file_name); #endif } } /* Now read in each of the cel files, one by one, filling out the columns of the intensity matrix. */ for (i=0; i < n_files; i++){ cur_file_name = CHAR(STRING_ELT(filenames, i)); if (asInteger(verbose)){ Rprintf("Reading in : %s\n",cur_file_name); } if (isTextCelFile(cur_file_name)){ read_cel_file_intensities(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1); } else if (isgzTextCelFile(cur_file_name)){ #if defined HAVE_ZLIB read_gzcel_file_intensities(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1); #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(cur_file_name)){ if (read_binarycel_file_intensities(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1)){ error("It appears that the file %s is corrupted.\n",cur_file_name); } } else if (isgzBinaryCelFile(cur_file_name)){ if (gzread_binarycel_file_intensities(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1)){ error("It appears that the file %s is corrupted.\n",cur_file_name); } } else if (isGenericCelFile(cur_file_name)){ if (read_genericcel_file_intensities(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1)){ error("It appears that the file %s is corrupted.\n",cur_file_name); } } else if (isgzGenericCelFile(cur_file_name)){ if (gzread_genericcel_file_intensities(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1)){ error("It appears that the file %s is corrupted.\n",cur_file_name); } } else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary, gzipped binary, command console and gzipped command console formats.\n",cur_file_name); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",cur_file_name); #endif } } /* Now lets go through all the files filling in masks etc */ if (asInteger(rm_mask) || asInteger(rm_outliers) || asInteger(rm_extra)){ for (i=0; i < n_files; i++){ cur_file_name = CHAR(STRING_ELT(filenames,i)); if (isTextCelFile(cur_file_name)){ if (asInteger(rm_extra)){ apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else if (isgzTextCelFile(cur_file_name)){ #if defined HAVE_ZLIB if (asInteger(rm_extra)){ gz_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { gz_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(cur_file_name)){ if (asInteger(rm_extra)){ binary_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { binary_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else if (isgzBinaryCelFile(cur_file_name)){ if (asInteger(rm_extra)){ gz_binary_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { gz_binary_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else if (isGenericCelFile(cur_file_name)){ if (asInteger(rm_extra)){ generic_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { generic_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else if (isgzGenericCelFile(cur_file_name)){ if (asInteger(rm_extra)){ gzgeneric_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { gzgeneric_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary and gzipped binary.\n",cur_file_name); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",cur_file_name); #endif } } } PROTECT(dimnames = allocVector(VECSXP,2)); PROTECT(names = allocVector(STRSXP,n_files)); for ( i =0; i < n_files; i++){ cur_file_name = CHAR(STRING_ELT(filenames, i)); SET_STRING_ELT(names,i,mkChar(cur_file_name)); } SET_VECTOR_ELT(dimnames,1,names); setAttrib(intensity, R_DimNamesSymbol, dimnames); UNPROTECT(3); return intensity; } /************************************************************************* ** ** SEXP ReadHeader(SEXP filename) ** ** SEXP filename - name of the file to Read. ** ** RETURNS a List containing CDFName, Rows and Cols dimensions. ** ** This function reads the HEADER of the CEL file, determines the ** CDF name and ROW,COL dimensions ** *************************************************************************/ SEXP ReadHeader(SEXP filename){ int ref_dim_1=0, ref_dim_2=0; const char *cur_file_name; const char *cdfName=0; SEXP headInfo; SEXP name; SEXP cel_dimensions; /* SEXP cel_dimensions_names; */ PROTECT(cel_dimensions= allocVector(INTSXP,2)); PROTECT(headInfo = allocVector(VECSXP,2)); cur_file_name = CHAR(STRING_ELT(filename, 0)); /* check for type text, gzipped text or binary then ReadHeader */ if (isTextCelFile(cur_file_name)){ cdfName = get_header_info(cur_file_name, &ref_dim_1,&ref_dim_2); } else if (isgzTextCelFile(cur_file_name)){ #if defined HAVE_ZLIB cdfName = gz_get_header_info(cur_file_name, &ref_dim_1,&ref_dim_2); #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(cur_file_name)){ cdfName = binary_get_header_info(cur_file_name, &ref_dim_1,&ref_dim_2); } else if (isgzBinaryCelFile(cur_file_name)){ cdfName = gzbinary_get_header_info(cur_file_name, &ref_dim_1,&ref_dim_2); } else if (isGenericCelFile(cur_file_name)){ cdfName = generic_get_header_info(cur_file_name, &ref_dim_1,&ref_dim_2); } else if (isgzGenericCelFile(cur_file_name)){ cdfName = gzgeneric_get_header_info(cur_file_name, &ref_dim_1,&ref_dim_2); } else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary, gzipped binary, command console and gzipped command console formats\n",cur_file_name); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",cur_file_name); #endif } PROTECT(name = allocVector(STRSXP,1)); SET_STRING_ELT(name,0,mkChar(cdfName)); INTEGER(cel_dimensions)[0] = ref_dim_1; /* This is cols */ INTEGER(cel_dimensions)[1] = ref_dim_2; /* this is rows */ SET_VECTOR_ELT(headInfo,0,name); SET_VECTOR_ELT(headInfo,1,cel_dimensions); Free(cdfName); UNPROTECT(3); return headInfo; } /************************************************************************* ** ** SEXP ReadHeaderDetailed(SEXP filename) ** ** SEXP filename - name of the file to Read. ** ** RETURNS a List containing CDFName, Rows and Cols dimensions plus more detailed header information ** ** ** This function reads the HEADER of the CEL file ** *************************************************************************/ SEXP ReadHeaderDetailed(SEXP filename){ SEXP HEADER; SEXP tmp_sexp; const char *cur_file_name; detailed_header_info header_info; PROTECT(HEADER = allocVector(VECSXP,10)); /* return as a list */ cur_file_name = CHAR(STRING_ELT(filename,0)); if (isTextCelFile(cur_file_name)){ get_detailed_header_info(cur_file_name,&header_info); } else if (isgzTextCelFile(cur_file_name)){ #if defined HAVE_ZLIB gz_get_detailed_header_info(cur_file_name,&header_info); #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(cur_file_name)){ binary_get_detailed_header_info(cur_file_name,&header_info); } else if (isgzBinaryCelFile(cur_file_name)){ gzbinary_get_detailed_header_info(cur_file_name,&header_info); } else if (isGenericCelFile(cur_file_name)){ generic_get_detailed_header_info(cur_file_name,&header_info); } else if (isgzGenericCelFile(cur_file_name)){ gzgeneric_get_detailed_header_info(cur_file_name,&header_info); } else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary, gzipped binary, command console and gzipped command console formats.\n",cur_file_name); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",cur_file_name); #endif } /* Rprintf("%s\n",header_info.cdfName); */ /* Copy everything across into the R data structure */ PROTECT(tmp_sexp = allocVector(STRSXP,1)); SET_STRING_ELT(tmp_sexp,0,mkChar(header_info.cdfName)); SET_VECTOR_ELT(HEADER,0,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(INTSXP,2)); INTEGER(tmp_sexp)[0] = header_info.cols; /* This is cols */ INTEGER(tmp_sexp)[1] = header_info.rows; /* this is rows */ SET_VECTOR_ELT(HEADER,1,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(INTSXP,2)); INTEGER(tmp_sexp)[0] = header_info.GridCornerULx; INTEGER(tmp_sexp)[1] = header_info.GridCornerULy; SET_VECTOR_ELT(HEADER,2,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(INTSXP,2)); INTEGER(tmp_sexp)[0] = header_info.GridCornerURx; INTEGER(tmp_sexp)[1] = header_info.GridCornerURy; SET_VECTOR_ELT(HEADER,3,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(INTSXP,2)); INTEGER(tmp_sexp)[0] = header_info.GridCornerLRx; INTEGER(tmp_sexp)[1] = header_info.GridCornerLRy; SET_VECTOR_ELT(HEADER,4,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(INTSXP,2)); INTEGER(tmp_sexp)[0] = header_info.GridCornerLLx; INTEGER(tmp_sexp)[1] = header_info.GridCornerLLy; SET_VECTOR_ELT(HEADER,5,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp = allocVector(STRSXP,1)); SET_STRING_ELT(tmp_sexp,0,mkChar(header_info.DatHeader)); SET_VECTOR_ELT(HEADER,6,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp = allocVector(STRSXP,1)); SET_STRING_ELT(tmp_sexp,0,mkChar(header_info.Algorithm)); SET_VECTOR_ELT(HEADER,7,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp = allocVector(STRSXP,1)); SET_STRING_ELT(tmp_sexp,0,mkChar(header_info.AlgorithmParameters)); SET_VECTOR_ELT(HEADER,8,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp = allocVector(STRSXP,1)); SET_STRING_ELT(tmp_sexp,0,mkChar(header_info.ScanDate)); SET_VECTOR_ELT(HEADER,9,tmp_sexp); UNPROTECT(1); Free(header_info.Algorithm); Free(header_info.AlgorithmParameters); Free(header_info.DatHeader); Free(header_info.cdfName); UNPROTECT(1); return HEADER; } /* Refactored from read_probeintensities so both threaded and non-threaded versions can use the same code */ void readfile(SEXP filenames, double *CurintensityMatrix, double *pmMatrix, double *mmMatrix, int i, int ref_dim_1, int ref_dim_2, int n_files, int num_probes, SEXP cdfInfo, int which_flag, SEXP verbose){ const char *cur_file_name; #ifdef USE_PTHREADS pthread_mutex_lock (&mutex_R); cur_file_name = CHAR(STRING_ELT(filenames,i)); pthread_mutex_unlock (&mutex_R); #else cur_file_name = CHAR(STRING_ELT(filenames,i)); #endif if (asInteger(verbose)){ Rprintf("Reading in : %s\n",cur_file_name); } if (isTextCelFile(cur_file_name)){ if(read_cel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1) !=0){ error("The CEL file %s was corrupted. Data not read.\n",cur_file_name); } storeIntensities(CurintensityMatrix,pmMatrix,mmMatrix,i,ref_dim_1*ref_dim_2, n_files,num_probes,cdfInfo,which_flag); } else if (isgzTextCelFile(cur_file_name)){ #if defined HAVE_ZLIB if(read_gzcel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1)!=0){ error("The CEL file %s was corrupted. Data not read.\n",cur_file_name); } storeIntensities(CurintensityMatrix,pmMatrix,mmMatrix,i,ref_dim_1*ref_dim_2, n_files,num_probes,cdfInfo,which_flag); #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(cur_file_name)){ if(read_binarycel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1) !=0){ error("The CEL file %s was corrupted. Data not read.\n",cur_file_name); } storeIntensities(CurintensityMatrix,pmMatrix,mmMatrix,i,ref_dim_1*ref_dim_2, n_files,num_probes,cdfInfo,which_flag); } else if (isgzBinaryCelFile(cur_file_name)){ if(gzread_binarycel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1) !=0){ error("The CEL file %s was corrupted. Data not read.\n",cur_file_name); } storeIntensities(CurintensityMatrix,pmMatrix,mmMatrix,i,ref_dim_1*ref_dim_2, n_files,num_probes,cdfInfo,which_flag); } else if (isGenericCelFile(cur_file_name)){ if(read_genericcel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1) !=0){ error("The CEL file %s was corrupted. Data not read.\n",cur_file_name); } storeIntensities(CurintensityMatrix,pmMatrix,mmMatrix,i,ref_dim_1*ref_dim_2, n_files,num_probes,cdfInfo,which_flag); } else if (isgzGenericCelFile(cur_file_name)){ if(gzread_genericcel_file_intensities(cur_file_name,CurintensityMatrix, 0, ref_dim_1*ref_dim_2, n_files,ref_dim_1)!=0){ error("The CEL file %s was corrupted. Data not read.\n",cur_file_name); } storeIntensities(CurintensityMatrix,pmMatrix,mmMatrix,i,ref_dim_1*ref_dim_2, n_files,num_probes,cdfInfo,which_flag); } else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary, gzipped binary, command console and gzipped command console formats.\n",cur_file_name); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",cur_file_name); #endif } } void checkFileCDF(SEXP filenames, int i, const char *cdfName, int ref_dim_1, int ref_dim_2){ #ifdef USE_PTHREADS pthread_mutex_lock (&mutex_R); const char *cur_file_name = CHAR(STRING_ELT(filenames,i)); pthread_mutex_unlock (&mutex_R); #else const char *cur_file_name = CHAR(STRING_ELT(filenames,i)); #endif if (isTextCelFile(cur_file_name)){ if (check_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isgzTextCelFile(cur_file_name)){ #if defined HAVE_ZLIB if (check_gzcel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(cur_file_name)){ if (check_binary_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isgzBinaryCelFile(cur_file_name)){ if (check_gzbinary_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isGenericCelFile(cur_file_name)){ if (check_generic_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isgzGenericCelFile(cur_file_name)){ if (check_gzgeneric_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary, gzipped binary, command console and gzipped command console formats.\n",cur_file_name); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",cur_file_name); #endif } } #ifdef USE_PTHREADS /* void * definitions are mandated by pthreads */ void *readfile_group(void *data){ int num; struct thread_data *args = (struct thread_data *) data; args->CurintensityMatrix = Calloc(args->ref_dim_1*args->ref_dim_2, double); for(num = args->i; num < args->i+args->chunk_size; num++){ readfile(args->filenames, args->CurintensityMatrix, args->pmMatrix, args->mmMatrix, num, args->ref_dim_1, args->ref_dim_2, args->n_files, args->num_probes, args->cdfInfo, args->which_flag, args->verbose); } Free(args->CurintensityMatrix); return NULL; } void *checkFileCDF_group(void *data){ int num; struct thread_data *args = (struct thread_data *) data; for(num = args->i; num < args->i+args->chunk_size; num++){ checkFileCDF(args->filenames, num, args->refCdfName, args->ref_dim_1, args->ref_dim_2); } return NULL; } #endif /************************************************************************* ** ** SEXP read_probeintensities(SEXP filenames, SEXP compress, SEXP rm_mask, ** SEXP rm_outliers, SEXP rm_extra, SEXP ref_cdfName, ** SEXP ref_dim, SEXP verbose, SEXP cdfInfo) ** ** ** SEXP filenames - an R character vector of filenames to read ** SEXP compress - logical flag TRUE means files are *.gz ** SEXP rm_mask - if true set MASKS to NA ** SEXP rm_outliers - if true set OUTLIERS to NA ** SEXP rm_extra - if true overrides rm_mask and rm_outliers settings ** SEXP ref_cdfName - the reference CDF name to check each CEL file against ** SEXP ref_dim - cols/rows of reference chip ** SEXP verbose - if verbose print out more information to the screen ** SEXP cdfInfo - locations of probes and probesets ** SEXP which - Indicate whether PM, MM or both are required ** ** returns an R list either one or two elements long. each element is a matrix ** either PM or MM elements ** ** ** This function reads probe intensites into PM and MM matrices. No ** affybatch style matrix is created, Instead the order of the probes is ** dependent on the the information given in cdfInfo. cdfInfo is a list ** of matrices, each matricies has two columns. The first is assumed to ** be PM indices, the second column is assumed to be MM indices. ** ** *************************************************************************/ SEXP read_probeintensities(SEXP filenames, SEXP rm_mask, SEXP rm_outliers, SEXP rm_extra, SEXP ref_cdfName, SEXP ref_dim, SEXP verbose, SEXP cdfInfo,SEXP which){ int i; int n_files; int ref_dim_1, ref_dim_2; int which_flag; /* 0 means both, 1 means PM only, -1 means MM only */ int num_probes; const char *cur_file_name; const char *cdfName; double *pmMatrix=0, *mmMatrix=0; #ifndef USE_PTHREADS double *CurintensityMatrix; #endif SEXP PM_intensity= R_NilValue, MM_intensity= R_NilValue, Current_intensity, names, dimnames; SEXP output_list,pmmmnames; #ifdef USE_PTHREADS SEXP curIndices; pthread_t *threads; char *nthreads; int returnCode, t, chunk_size, num_threads = 1; double chunk_size_d, chunk_tot_d; pthread_attr_t attr; struct thread_data *args; void *status; size_t stacksize = PTHREAD_STACK_MIN + 0x40000; #endif if (strcmp(CHAR(STRING_ELT(which,0)),"pm") == 0){ which_flag= 1; } else if (strcmp(CHAR(STRING_ELT(which,0)),"mm") == 0){ which_flag = -1; } else { which_flag = 0; } ref_dim_1 = INTEGER(ref_dim)[0]; ref_dim_2 = INTEGER(ref_dim)[1]; if (!isString(filenames)) error("read_probeintensities: argument 'filenames' must be a character vector"); n_files = GET_LENGTH(filenames); /* We will read in chip at a time */ PROTECT(Current_intensity = allocMatrix(REALSXP, ref_dim_1*ref_dim_2, 1)); cdfName = CHAR(STRING_ELT(ref_cdfName,0)); #ifndef USE_PTHREADS CurintensityMatrix = NUMERIC_POINTER(AS_NUMERIC(Current_intensity)); #endif /* Lets count how many probes we have */ num_probes = CountCDFProbes(cdfInfo); if (which_flag >= 0){ PROTECT(PM_intensity = allocMatrix(REALSXP,num_probes,n_files)); pmMatrix = NUMERIC_POINTER(AS_NUMERIC(PM_intensity)); } if (which_flag <= 0){ PROTECT(MM_intensity = allocMatrix(REALSXP,num_probes,n_files)); mmMatrix = NUMERIC_POINTER(AS_NUMERIC(MM_intensity)); } if (which_flag < 0){ pmMatrix = NULL; } if (which_flag > 0){ mmMatrix = NULL; } /* Setup the data required for threading */ #ifdef USE_PTHREADS nthreads = getenv(THREADS_ENV_VAR); if(nthreads != NULL){ num_threads = atoi(nthreads); if(num_threads <= 0){ error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads); } } threads = (pthread_t *) Calloc(num_threads, pthread_t); /* Initialize and set thread detached attribute */ pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pthread_attr_setstacksize (&attr, stacksize); /* this code works out how many threads to use and allocates ranges of files to each thread */ /* The aim is to try to be as fair as possible in dividing up the matrix */ /* A special cases to be aware of: 1) Number of files is less than the number of threads */ if (num_threads < n_files){ chunk_size = n_files/num_threads; chunk_size_d = ((double) n_files)/((double) num_threads); } else { chunk_size = 1; chunk_size_d = 1; } if(chunk_size == 0){ chunk_size = 1; } n_probesets = GET_LENGTH(cdfInfo); n_probes = (int *) Calloc(n_probesets, int); cur_indexes = (double **) Calloc(n_probesets, double *); /* Create the data structures required for each thread to independently run the checkFileCDF and readfile functions */ for(i=0; i < n_probesets; i++){ curIndices = VECTOR_ELT(cdfInfo,i); n_probes[i] = INTEGER(getAttrib(curIndices,R_DimSymbol))[0]; cur_indexes[i] = (double *) Calloc(n_probes[i]*2, double); memcpy(cur_indexes[i], NUMERIC_POINTER(AS_NUMERIC(curIndices)), sizeof(double)*n_probes[i]*2); } args = (struct thread_data *) Calloc((n_files < num_threads ? n_files : num_threads), struct thread_data); args[0].filenames = filenames; args[0].pmMatrix = pmMatrix; args[0].mmMatrix = mmMatrix; args[0].ref_dim_1 = ref_dim_1; args[0].ref_dim_2 = ref_dim_2, args[0].n_files = n_files; args[0].num_probes = num_probes; args[0].cdfInfo = cdfInfo; args[0].refCdfName = cdfName; args[0].which_flag = which_flag; args[0].verbose = verbose; pthread_mutex_init(&mutex_R, NULL); t = 0; /* t = number of actual threads doing work */ chunk_tot_d = 0; for (i=0; floor(chunk_tot_d+0.00001) < n_files; i+=chunk_size){ if(t != 0){ memcpy(&(args[t]), &(args[0]), sizeof(struct thread_data)); } args[t].i = i; /* take care of distribution of the remainder (when #chips%#threads != 0) */ chunk_tot_d += chunk_size_d; // Add 0.00001 in case there was a rounding issue with the division if(i+chunk_size < floor(chunk_tot_d+0.00001)){ args[t].chunk_size = chunk_size+1; i++; } else{ args[t].chunk_size = chunk_size; } t++; } /* First check headers of cel files */ /* before we do any real reading check that all the files are of the same cdf type */ for (i =0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, checkFileCDF_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } #else /* First check headers of cel files */ /* before we do any real reading check that all the files are of the same cdf type */ for (i =0; i < n_files; i++){ checkFileCDF(filenames, i, cdfName, ref_dim_1, ref_dim_2); } #endif /* now lets read them in and store them in the PM and MM matrices */ #ifdef USE_PTHREADS for(int i = 0; i < t; i++){ returnCode = pthread_create(&threads[i], &attr, readfile_group, (void *) &(args[i])); if (returnCode){ error("ERROR; return code from pthread_create() is %d\n", returnCode); } } /* Free attribute and wait for the other threads */ for(i = 0; i < t; i++){ returnCode = pthread_join(threads[i], &status); if (returnCode){ error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", i, returnCode, *((int *) status)); } } Free(args); Free(threads); pthread_attr_destroy(&attr); pthread_mutex_destroy(&mutex_R); /* clear the old index data */ Free(n_probes); for(i = 0; i < n_probesets; i++){ Free(cur_indexes[i]); } Free(cur_indexes); #else for (i=0; i < n_files; i++){ readfile(filenames, CurintensityMatrix, pmMatrix, mmMatrix, i, ref_dim_1, ref_dim_2, n_files, num_probes, cdfInfo, which_flag, verbose); } #endif PROTECT(dimnames = allocVector(VECSXP,2)); PROTECT(names = allocVector(STRSXP,n_files)); for ( i =0; i < n_files; i++){ cur_file_name = CHAR(STRING_ELT(filenames, i)); SET_STRING_ELT(names,i,mkChar(cur_file_name)); } SET_VECTOR_ELT(dimnames,1,names); if (which_flag >=0){ setAttrib(PM_intensity, R_DimNamesSymbol, dimnames); } if (which_flag <=0){ setAttrib(MM_intensity, R_DimNamesSymbol, dimnames); } if (which_flag == 0){ PROTECT(output_list = allocVector(VECSXP,2)); SET_VECTOR_ELT(output_list,0,PM_intensity); SET_VECTOR_ELT(output_list,1,MM_intensity); PROTECT(pmmmnames = allocVector(STRSXP,2)); SET_STRING_ELT(pmmmnames,0,mkChar("pm")); SET_STRING_ELT(pmmmnames,1,mkChar("mm")); } else if (which_flag > 0){ PROTECT(output_list = allocVector(VECSXP,1)); SET_VECTOR_ELT(output_list,0,PM_intensity); PROTECT(pmmmnames = allocVector(STRSXP,1)); SET_STRING_ELT(pmmmnames,0,mkChar("pm")); } else { PROTECT(output_list = allocVector(VECSXP,1)); SET_VECTOR_ELT(output_list,0,MM_intensity); PROTECT(pmmmnames = allocVector(STRSXP,1)); SET_STRING_ELT(pmmmnames,0,mkChar("mm")); } setAttrib(output_list,R_NamesSymbol,pmmmnames); if (which_flag != 0){ UNPROTECT(6); } else { UNPROTECT(7); } return(output_list); } /************************************************************************ ** ** SEXP read_abatch_stddev(SEXP filenames, SEXP compress, ** SEXP rm_mask, SEXP rm_outliers, SEXP rm_extra, ** SEXP ref_cdfName) ** ** SEXP filenames - an R character vector of filenames to read ** ** SEXP rm_mask - if true set MASKS to NA ** SEXP rm_outliers - if true set OUTLIERS to NA ** SEXP rm_extra - if true overrides rm_mask and rm_outliers settings ** SEXP ref_cdfName - the reference CDF name to check each CEL file against ** SEXP ref_dim - cols/rows of reference chip ** SEXP verbose - if verbose print out more information to the screen ** ** RETURNS an intensity matrix with cel file stddev from ** each chip in columns ** ** this function will read in all the cel files in a affybatch. ** this function will stop on possible errors with an error() call. ** ** The intensity matrix will be allocated here. It will be given ** column names here. the column names that it will be given here are the ** filenames. ** *************************************************************************/ SEXP read_abatch_stddev(SEXP filenames, SEXP rm_mask, SEXP rm_outliers, SEXP rm_extra, SEXP ref_cdfName, SEXP ref_dim, SEXP verbose){ int i; int n_files; int ref_dim_1, ref_dim_2; const char *cur_file_name; const char *cdfName; double *intensityMatrix; SEXP intensity,names,dimnames; ref_dim_1 = INTEGER(ref_dim)[0]; ref_dim_2 = INTEGER(ref_dim)[1]; if (!isString(filenames)) error("read_abatch_stddev: argument 'filenames' must be a character vector"); n_files = GET_LENGTH(filenames); PROTECT(intensity = allocMatrix(REALSXP, ref_dim_1*ref_dim_2, n_files)); cdfName = CHAR(STRING_ELT(ref_cdfName,0)); intensityMatrix = NUMERIC_POINTER(AS_NUMERIC(intensity)); /* before we do any real reading check that all the files are of the same cdf type */ for (i =0; i < n_files; i++){ cur_file_name = CHAR(STRING_ELT(filenames, i)); if (isTextCelFile(cur_file_name)){ if (check_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isgzTextCelFile(cur_file_name)){ #if defined HAVE_ZLIB if (check_gzcel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(cur_file_name)){ if (check_binary_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isgzBinaryCelFile(cur_file_name)){ if (check_gzbinary_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isGenericCelFile(cur_file_name)){ if (check_generic_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isgzGenericCelFile(cur_file_name)){ if (check_gzgeneric_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary, gzipped binary, command console and gzipped command console formats.\n",cur_file_name); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",cur_file_name); #endif } } /* Now read in each of the cel files, one by one, filling out the columns of the intensity matrix. */ for (i=0; i < n_files; i++){ cur_file_name = CHAR(STRING_ELT(filenames, i)); if (asInteger(verbose)){ Rprintf("Reading in : %s\n",cur_file_name); } if (isTextCelFile(cur_file_name)){ read_cel_file_stddev(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1); } else if (isgzTextCelFile(cur_file_name)){ #if defined HAVE_ZLIB read_gzcel_file_stddev(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1); #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(cur_file_name)){ if (read_binarycel_file_stddev(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1)){ error("It appears that the file %s is corrupted.\n",cur_file_name); } } else if (isgzBinaryCelFile(cur_file_name)){ if (gzread_binarycel_file_stddev(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1)){ error("It appears that the file %s is corrupted.\n",cur_file_name); } } else if (isGenericCelFile(cur_file_name)){ if (read_genericcel_file_stddev(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1)){ error("It appears that the file %s is corrupted.\n",cur_file_name); } } else if (isgzGenericCelFile(cur_file_name)){ if (gzread_genericcel_file_stddev(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1)){ error("It appears that the file %s is corrupted.\n",cur_file_name); } }else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary and gzipped binary\n",cur_file_name); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",cur_file_name); #endif } } /* Now lets go through all the files filling in masks etc */ if (asInteger(rm_mask) || asInteger(rm_outliers) || asInteger(rm_extra)){ for (i=0; i < n_files; i++){ cur_file_name = CHAR(STRING_ELT(filenames, i)); if (isTextCelFile(cur_file_name)){ if (asInteger(rm_extra)){ apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else if (isgzTextCelFile(cur_file_name)){ #if defined HAVE_ZLIB if (asInteger(rm_extra)){ gz_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { gz_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(cur_file_name)){ if (asInteger(rm_extra)){ binary_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { binary_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else if (isgzBinaryCelFile(cur_file_name)){ if (asInteger(rm_extra)){ gz_binary_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { gz_binary_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else if (isGenericCelFile(cur_file_name)){ if (asInteger(rm_extra)){ generic_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { generic_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else if (isgzGenericCelFile(cur_file_name)){ if (asInteger(rm_extra)){ gzgeneric_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { gzgeneric_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } }else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary, gzipped binary, command console and gzipped command console formats.\n",cur_file_name); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",cur_file_name); #endif } } } PROTECT(dimnames = allocVector(VECSXP,2)); PROTECT(names = allocVector(STRSXP,n_files)); for ( i =0; i < n_files; i++){ cur_file_name = CHAR(STRING_ELT(filenames, i)); SET_STRING_ELT(names,i,mkChar(cur_file_name)); } SET_VECTOR_ELT(dimnames,1,names); setAttrib(intensity, R_DimNamesSymbol, dimnames); UNPROTECT(3); return intensity; } /************************************************************************ ** ** SEXP read_abatch_npixels(SEXP filenames, SEXP compress, ** SEXP rm_mask, SEXP rm_outliers, SEXP rm_extra, ** SEXP ref_cdfName) ** ** SEXP filenames - an R character vector of filenames to read ** ** SEXP rm_mask - if true set MASKS to NA ** SEXP rm_outliers - if true set OUTLIERS to NA ** SEXP rm_extra - if true overrides rm_mask and rm_outliers settings ** SEXP ref_cdfName - the reference CDF name to check each CEL file against ** SEXP ref_dim - cols/rows of reference chip ** SEXP verbose - if verbose print out more information to the screen ** ** RETURNS an intensity matrix with cel file stddev from ** each chip in columns ** ** this function will read in all the cel files in a affybatch. ** this function will stop on possible errors with an error() call. ** ** The intensity matrix will be allocated here. It will be given ** column names here. the column names that it will be given here are the ** filenames. ** *************************************************************************/ SEXP read_abatch_npixels(SEXP filenames, SEXP rm_mask, SEXP rm_outliers, SEXP rm_extra, SEXP ref_cdfName, SEXP ref_dim, SEXP verbose){ int i; int n_files; int ref_dim_1, ref_dim_2; const char *cur_file_name; const char *cdfName; double *intensityMatrix; SEXP intensity,names,dimnames; ref_dim_1 = INTEGER(ref_dim)[0]; ref_dim_2 = INTEGER(ref_dim)[1]; if (!isString(filenames)) error("read_abatch_npixels: argument 'filenames' must be a character vector"); n_files = GET_LENGTH(filenames); PROTECT(intensity = allocMatrix(REALSXP, ref_dim_1*ref_dim_2, n_files)); cdfName = CHAR(STRING_ELT(ref_cdfName,0)); intensityMatrix = NUMERIC_POINTER(AS_NUMERIC(intensity)); /* before we do any real reading check that all the files are of the same cdf type */ for (i =0; i < n_files; i++){ cur_file_name = CHAR(STRING_ELT(filenames, i)); if (isTextCelFile(cur_file_name)){ if (check_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isgzTextCelFile(cur_file_name)){ #if defined HAVE_ZLIB if (check_gzcel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(cur_file_name)){ if (check_binary_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isgzBinaryCelFile(cur_file_name)){ if (check_gzbinary_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isGenericCelFile(cur_file_name)){ if (check_generic_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else if (isgzGenericCelFile(cur_file_name)){ if (check_gzgeneric_cel_file(cur_file_name,cdfName, ref_dim_1, ref_dim_2)){ error("File %s does not seem to have correct dimension or is not of %s chip type.", cur_file_name, cdfName); } } else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary, gzipped binary, command console and gzipped command console formats.\n",cur_file_name); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",cur_file_name); #endif } } /* Now read in each of the cel files, one by one, filling out the columns of the intensity matrix. */ for (i=0; i < n_files; i++){ cur_file_name = CHAR(STRING_ELT(filenames, i)); if (asInteger(verbose)){ Rprintf("Reading in : %s\n",cur_file_name); } if (isTextCelFile(cur_file_name)){ read_cel_file_npixels(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1); } else if (isgzTextCelFile(cur_file_name)){ #if defined HAVE_ZLIB read_gzcel_file_npixels(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1); #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(cur_file_name)){ if (read_binarycel_file_npixels(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1)){ error("It appears that the file %s is corrupted.\n",cur_file_name); } } else if (isgzBinaryCelFile(cur_file_name)){ if (gzread_binarycel_file_npixels(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1)){ error("It appears that the file %s is corrupted.\n",cur_file_name); } } else if (isGenericCelFile(cur_file_name)){ if (read_genericcel_file_npixels(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1)){ error("It appears that the file %s is corrupted.\n",cur_file_name); } } else if (isgzGenericCelFile(cur_file_name)){ if (gzread_genericcel_file_npixels(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1)){ error("It appears that the file %s is corrupted.\n",cur_file_name); } } else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary and gzipped binary\n",cur_file_name); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",cur_file_name); #endif } } /* Now lets go through all the files filling in masks etc */ if (asInteger(rm_mask) || asInteger(rm_outliers) || asInteger(rm_extra)){ for (i=0; i < n_files; i++){ cur_file_name = CHAR(STRING_ELT(filenames, i)); if (isTextCelFile(cur_file_name)){ if (asInteger(rm_extra)){ apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else if (isgzTextCelFile(cur_file_name)){ #if defined HAVE_ZLIB if (asInteger(rm_extra)){ gz_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { gz_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(cur_file_name)){ if (asInteger(rm_extra)){ binary_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { binary_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else if (isgzBinaryCelFile(cur_file_name)){ if (asInteger(rm_extra)){ gz_binary_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { gz_binary_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else if (isGenericCelFile(cur_file_name)){ if (asInteger(rm_extra)){ generic_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { generic_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else if (isgzGenericCelFile(cur_file_name)){ if (asInteger(rm_extra)){ gzgeneric_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,1,1); } else { gzgeneric_apply_masks(cur_file_name,intensityMatrix, i, ref_dim_1*ref_dim_2, n_files,ref_dim_1,asInteger(rm_mask),asInteger(rm_outliers)); } } else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary, gzipped binary, command console and gzipped command console formats.\n",cur_file_name); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",cur_file_name); #endif } } } PROTECT(dimnames = allocVector(VECSXP,2)); PROTECT(names = allocVector(STRSXP,n_files)); for ( i =0; i < n_files; i++){ cur_file_name = CHAR(STRING_ELT(filenames, i)); SET_STRING_ELT(names,i,mkChar(cur_file_name)); } SET_VECTOR_ELT(dimnames,1,names); setAttrib(intensity, R_DimNamesSymbol, dimnames); UNPROTECT(3); return intensity; } /**************************************************************** **************************************************************** ** ** The following is for "C" code interfacing with CEL files ** in a manner other than above. ** *************************************************************** ***************************************************************/ /************************************************************************ ** ** CEL *read_cel_file(const char *filename) ** ** Reads the contents of the CEL file into a "CEL" structure. ** Currently slightly inefficient (should be reimplemented more ** cleanly later) ** ** ************************************************************************/ CEL *read_cel_file(const char *filename, int read_intensities_only){ CEL *my_CEL; int i,k; my_CEL = Calloc(1, CEL); my_CEL->multichannel = 0; my_CEL->channelnames = NULL; /** First get the header information **/ if (isTextCelFile(filename)){ get_detailed_header_info(filename,&my_CEL->header); } else if (isgzTextCelFile(filename)){ #if defined HAVE_ZLIB gz_get_detailed_header_info(filename,&my_CEL->header); #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(filename)){ binary_get_detailed_header_info(filename,&my_CEL->header); } else if (isgzBinaryCelFile(filename)){ gzbinary_get_detailed_header_info(filename,&my_CEL->header); } else if (isGenericCelFile(filename)){ generic_get_detailed_header_info(filename,&my_CEL->header); } else if (isgzGenericCelFile(filename)){ gzgeneric_get_detailed_header_info(filename,&my_CEL->header); } else if (isGenericMultiChannelCelFile(filename)){ generic_get_detailed_header_info(filename,&my_CEL->header); my_CEL->multichannel = multichannel_determine_number_channels(filename); my_CEL->channelnames = Calloc(my_CEL->multichannel,char*); for (k = 0; k < my_CEL->multichannel; k++){ my_CEL->channelnames[k] =multichannel_determine_channel_name(filename, k); } } else if (isgzGenericMultiChannelCelFile(filename)){ gzgeneric_get_detailed_header_info(filename,&my_CEL->header); my_CEL->multichannel = gzmultichannel_determine_number_channels(filename); my_CEL->channelnames = Calloc(my_CEL->multichannel,char*); for (k = 0; k < my_CEL->multichannel; k++){ my_CEL->channelnames[k] =gzmultichannel_determine_channel_name(filename, k); } } else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary and gzipped binary\n",filename); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",filename); #endif } /*** Now lets allocate the space for intensities, stdev, npixels ****/ if (!my_CEL->multichannel){ my_CEL->intensities = Calloc(1,double *); my_CEL->intensities[0] = Calloc((my_CEL->header.cols)*(my_CEL->header.rows),double); if (!read_intensities_only){ my_CEL->stddev = Calloc(1,double *); my_CEL->npixels = Calloc(1,double *); my_CEL->stddev[0] = Calloc((my_CEL->header.cols)*(my_CEL->header.rows),double); my_CEL->npixels[0] = Calloc((my_CEL->header.cols)*(my_CEL->header.rows),double); } else { my_CEL->stddev = NULL; my_CEL->npixels = NULL; } } else { my_CEL->intensities = Calloc(my_CEL->multichannel,double *); for (i=0; i < my_CEL->multichannel; i++){ my_CEL->intensities[i] = Calloc((my_CEL->header.cols)*(my_CEL->header.rows),double); } if (!read_intensities_only){ my_CEL->stddev = Calloc(my_CEL->multichannel,double *); my_CEL->npixels = Calloc(my_CEL->multichannel,double *); for (i=0; i < my_CEL->multichannel; i++){ my_CEL->stddev[i] = Calloc((my_CEL->header.cols)*(my_CEL->header.rows),double); my_CEL->npixels[i] = Calloc((my_CEL->header.cols)*(my_CEL->header.rows),double); } } else { my_CEL->stddev = NULL; my_CEL->npixels = NULL; } } if (isTextCelFile(filename)){ read_cel_file_intensities(filename,my_CEL->intensities[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); if (!read_intensities_only){ read_cel_file_stddev(filename,my_CEL->stddev[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); read_cel_file_npixels(filename,my_CEL->npixels[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); } } else if (isgzTextCelFile(filename)){ #if defined HAVE_ZLIB read_gzcel_file_intensities(filename,my_CEL->intensities[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); if (!read_intensities_only){ read_gzcel_file_stddev(filename,my_CEL->stddev[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); read_gzcel_file_npixels(filename,my_CEL->npixels[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); } #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(filename)){ if (read_binarycel_file_intensities(filename,my_CEL->intensities[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols)){ error("It appears that the file %s is corrupted.",filename); } if (!read_intensities_only){ read_binarycel_file_stddev(filename,my_CEL->stddev[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); read_binarycel_file_npixels(filename,my_CEL->npixels[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); } } else if (isgzBinaryCelFile(filename)){ if (gzread_binarycel_file_intensities(filename,my_CEL->intensities[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols)){ error("It appears that the file %s is corrupted.",filename); } if (!read_intensities_only){ gzread_binarycel_file_stddev(filename,my_CEL->stddev[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); gzread_binarycel_file_npixels(filename,my_CEL->npixels[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); } } else if (isGenericCelFile(filename)){ read_genericcel_file_intensities(filename,my_CEL->intensities[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); if (!read_intensities_only){ read_genericcel_file_stddev(filename,my_CEL->stddev[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); read_genericcel_file_npixels(filename,my_CEL->npixels[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); } } else if (isgzGenericCelFile(filename)){ gzread_genericcel_file_intensities(filename,my_CEL->intensities[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); if (!read_intensities_only){ gzread_genericcel_file_stddev(filename,my_CEL->stddev[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); gzread_genericcel_file_npixels(filename,my_CEL->npixels[0], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols); } } else if (isGenericMultiChannelCelFile(filename)){ for (i=0; i < my_CEL->multichannel; i++){ read_genericcel_file_intensities_multichannel(filename,my_CEL->intensities[i], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols, i); if (!read_intensities_only){ read_genericcel_file_stddev_multichannel(filename,my_CEL->stddev[i], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols, i); read_genericcel_file_npixels_multichannel(filename,my_CEL->npixels[i], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols, i); } } } else if (isgzGenericMultiChannelCelFile(filename)){ for (i=0; i < my_CEL->multichannel; i++){ gzread_genericcel_file_intensities_multichannel(filename,my_CEL->intensities[i], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols, i); if (!read_intensities_only){ gzread_genericcel_file_stddev_multichannel(filename,my_CEL->stddev[i], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols, i); gzread_genericcel_file_npixels_multichannel(filename,my_CEL->npixels[i], 0, (my_CEL->header.cols)*(my_CEL->header.rows), 1,my_CEL->header.cols, i); } } }else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary and gzipped binary\n",filename); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",filename); #endif } /*** Now add masks and outliers ***/ if (!my_CEL->multichannel){ my_CEL->nmasks = Calloc(1, int); my_CEL->noutliers = Calloc(1, int); my_CEL->masks_x = Calloc(1, short *); my_CEL->masks_y = Calloc(1, short *); my_CEL->outliers_x = Calloc(1, short *); my_CEL->outliers_y = Calloc(1, short *); } else { my_CEL->nmasks = Calloc(my_CEL->multichannel, int); my_CEL->noutliers = Calloc(my_CEL->multichannel, int); my_CEL->masks_x = Calloc(my_CEL->multichannel, short *); my_CEL->masks_y = Calloc(my_CEL->multichannel, short *); my_CEL->outliers_x = Calloc(my_CEL->multichannel, short *); my_CEL->outliers_y = Calloc(my_CEL->multichannel, short *); } if (isTextCelFile(filename)){ get_masks_outliers(filename, &(my_CEL->nmasks[0]), &my_CEL->masks_x[0], &my_CEL->masks_y[0], &(my_CEL->noutliers[0]), &my_CEL->outliers_x[0], &my_CEL->outliers_y[0]); } else if (isgzTextCelFile(filename)){ #if defined HAVE_ZLIB gz_get_masks_outliers(filename, &(my_CEL->nmasks[0]), &my_CEL->masks_x[0], &my_CEL->masks_y[0], &(my_CEL->noutliers[0]), &my_CEL->outliers_x[0], &my_CEL->outliers_y[0]); #else error("Compress option not supported on your platform\n"); #endif } else if (isBinaryCelFile(filename)){ binary_get_masks_outliers(filename, &(my_CEL->nmasks[0]), &my_CEL->masks_x[0], &my_CEL->masks_y[0], &(my_CEL->noutliers[0]), &my_CEL->outliers_x[0], &my_CEL->outliers_y[0]); } else if (isgzBinaryCelFile(filename)){ /****************************/ gzbinary_get_masks_outliers(filename, &(my_CEL->nmasks[0]), &my_CEL->masks_x[0], &my_CEL->masks_y[0], &(my_CEL->noutliers[0]), &my_CEL->outliers_x[0], &my_CEL->outliers_y[0]); } else if (isGenericCelFile(filename)){ generic_get_masks_outliers(filename, &(my_CEL->nmasks[0]), &my_CEL->masks_x[0], &my_CEL->masks_y[0], &(my_CEL->noutliers[0]), &my_CEL->outliers_x[0], &my_CEL->outliers_y[0]); } else if (isgzGenericCelFile(filename)){ gzgeneric_get_masks_outliers(filename, &(my_CEL->nmasks[0]), &my_CEL->masks_x[0], &my_CEL->masks_y[0], &(my_CEL->noutliers[0]), &my_CEL->outliers_x[0], &my_CEL->outliers_y[0]); } else if (isGenericMultiChannelCelFile(filename)){ for (i=0; i < my_CEL->multichannel; i++){ generic_get_masks_outliers_multichannel(filename, &(my_CEL->nmasks[i]), &my_CEL->masks_x[i], &my_CEL->masks_y[i], &(my_CEL->noutliers[i]), &my_CEL->outliers_x[i], &my_CEL->outliers_y[i], i); } } else if (isgzGenericMultiChannelCelFile(filename)){ for (i=0; i < my_CEL->multichannel; i++){ gzgeneric_get_masks_outliers_multichannel(filename, &(my_CEL->nmasks[i]), &my_CEL->masks_x[i], &my_CEL->masks_y[i], &(my_CEL->noutliers[i]), &my_CEL->outliers_x[i], &my_CEL->outliers_y[i], i); } } else { #if defined HAVE_ZLIB error("Is %s really a CEL file? tried reading as text, gzipped text, binary, gzipped binary, command console and gzipped command console formats.\n",filename); #else error("Is %s really a CEL file? tried reading as text and binary. The gzipped text and binary formats are not supported on your platform.\n",filename); #endif } return my_CEL; } /************************************************************************** ** ** ** Read a single CEL file into an R list structure ** Mostly just for testing the above ** **************************************************************************/ SEXP R_read_cel_file(SEXP filename, SEXP intensities_mean_only){ SEXP theCEL; SEXP theCEL_names; SEXP HEADER; SEXP HEADERnames; SEXP INTENSITIES; SEXP INTENSITIES_CHANNEL; SEXP INTENSITIES_VALUES; SEXP INTENSITIES_STDDEV; SEXP INTENSITIES_NPIXELS; SEXP INTENSITIESnames; SEXP MASKS; SEXP MASKS_CHANNEL; SEXP OUTLIERS; SEXP OUTLIERS_CHANNEL; SEXP MULTICHANNELFLAG; SEXP CHANNELNAMES; SEXP dimnames; SEXP tmp_sexp; int i,k; int read_intensities_only; const char *cur_file_name = CHAR(STRING_ELT(filename,0)); read_intensities_only = INTEGER_POINTER(intensities_mean_only)[0]; CEL *myCEL =read_cel_file(cur_file_name,read_intensities_only); if (!myCEL->multichannel){ PROTECT(theCEL = allocVector(VECSXP,4)); } else { PROTECT(theCEL = allocVector(VECSXP,6)); } PROTECT(HEADER = allocVector(VECSXP,9)); PROTECT(tmp_sexp = allocVector(STRSXP,1)); SET_STRING_ELT(tmp_sexp,0,mkChar(myCEL->header.cdfName)); SET_VECTOR_ELT(HEADER,0,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(INTSXP,2)); INTEGER(tmp_sexp)[0] = myCEL->header.cols; /* This is cols */ INTEGER(tmp_sexp)[1] = myCEL->header.rows; /* this is rows */ SET_VECTOR_ELT(HEADER,1,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(INTSXP,2)); INTEGER(tmp_sexp)[0] = myCEL->header.GridCornerULx; INTEGER(tmp_sexp)[1] = myCEL->header.GridCornerULy; SET_VECTOR_ELT(HEADER,2,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(INTSXP,2)); INTEGER(tmp_sexp)[0] = myCEL->header.GridCornerURx; INTEGER(tmp_sexp)[1] = myCEL->header.GridCornerURy; SET_VECTOR_ELT(HEADER,3,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(INTSXP,2)); INTEGER(tmp_sexp)[0] = myCEL->header.GridCornerLRx; INTEGER(tmp_sexp)[1] = myCEL->header.GridCornerLRy; SET_VECTOR_ELT(HEADER,4,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(INTSXP,2)); INTEGER(tmp_sexp)[0] = myCEL->header.GridCornerLLx; INTEGER(tmp_sexp)[1] = myCEL->header.GridCornerLLy; SET_VECTOR_ELT(HEADER,5,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp = allocVector(STRSXP,1)); SET_STRING_ELT(tmp_sexp,0,mkChar(myCEL->header.DatHeader)); SET_VECTOR_ELT(HEADER,6,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp = allocVector(STRSXP,1)); SET_STRING_ELT(tmp_sexp,0,mkChar(myCEL->header.Algorithm)); SET_VECTOR_ELT(HEADER,7,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp = allocVector(STRSXP,1)); SET_STRING_ELT(tmp_sexp,0,mkChar(myCEL->header.AlgorithmParameters)); SET_VECTOR_ELT(HEADER,8,tmp_sexp); UNPROTECT(1); SET_VECTOR_ELT(theCEL,0,HEADER); PROTECT(HEADERnames = allocVector(STRSXP,9)); SET_STRING_ELT(HEADERnames,0,mkChar("cdfName")); SET_STRING_ELT(HEADERnames,1,mkChar("CEL dimensions")); SET_STRING_ELT(HEADERnames,2,mkChar("GridCornerUL")); SET_STRING_ELT(HEADERnames,3,mkChar("GridCornerUR")); SET_STRING_ELT(HEADERnames,4,mkChar("GridCornerLR")); SET_STRING_ELT(HEADERnames,5,mkChar("GridCornerLL")); SET_STRING_ELT(HEADERnames,6,mkChar("DatHeader")); SET_STRING_ELT(HEADERnames,7,mkChar("Algorithm")); SET_STRING_ELT(HEADERnames,8,mkChar("AlgorithmParameters")); setAttrib(HEADER, R_NamesSymbol, HEADERnames); UNPROTECT(2); if (!myCEL->multichannel){ PROTECT(INTENSITIES = allocVector(VECSXP,3)); PROTECT(INTENSITIES_VALUES = allocVector(REALSXP,(myCEL->header.cols)*(myCEL->header.rows))); if (!read_intensities_only){ PROTECT(INTENSITIES_STDDEV = allocVector(REALSXP,(myCEL->header.cols)*(myCEL->header.rows))); PROTECT(INTENSITIES_NPIXELS = allocVector(REALSXP,(myCEL->header.cols)*(myCEL->header.rows))); } else { INTENSITIES_STDDEV = R_NilValue; INTENSITIES_NPIXELS = R_NilValue; } for (i =0; i < (myCEL->header.cols)*(myCEL->header.rows); i++){ REAL(INTENSITIES_VALUES)[i] = myCEL->intensities[0][i]; if (!read_intensities_only){ REAL(INTENSITIES_STDDEV)[i] = myCEL->stddev[0][i]; REAL(INTENSITIES_NPIXELS)[i] = myCEL->npixels[0][i]; } } SET_VECTOR_ELT(INTENSITIES,0,INTENSITIES_VALUES); SET_VECTOR_ELT(INTENSITIES,1,INTENSITIES_STDDEV); SET_VECTOR_ELT(INTENSITIES,2,INTENSITIES_NPIXELS); if (!read_intensities_only){ UNPROTECT(3); } else { UNPROTECT(1); } PROTECT(INTENSITIESnames=allocVector(STRSXP,3)); SET_STRING_ELT(INTENSITIESnames,0,mkChar("MEAN")); SET_STRING_ELT(INTENSITIESnames,1,mkChar("STDEV")); SET_STRING_ELT(INTENSITIESnames,2,mkChar("NPIXELS")); setAttrib(INTENSITIES, R_NamesSymbol, INTENSITIESnames); UNPROTECT(1); SET_VECTOR_ELT(theCEL,1,INTENSITIES); UNPROTECT(1); PROTECT(MASKS = allocMatrix(INTSXP,myCEL->nmasks[0],2)); for (i =0; i < myCEL->nmasks[0]; i++){ INTEGER(MASKS)[i] = (int)(myCEL->masks_x[0][i]); INTEGER(MASKS)[myCEL->nmasks[0] + i] = (int)(myCEL->masks_y[0][i]); } PROTECT(dimnames = allocVector(VECSXP,2)); PROTECT(tmp_sexp = allocVector(STRSXP,2)); SET_STRING_ELT(tmp_sexp,0,mkChar("X")); SET_STRING_ELT(tmp_sexp,1,mkChar("Y")); SET_VECTOR_ELT(dimnames,1,tmp_sexp); setAttrib(MASKS, R_DimNamesSymbol, dimnames); UNPROTECT(2); SET_VECTOR_ELT(theCEL,2,MASKS); UNPROTECT(1); PROTECT(OUTLIERS = allocMatrix(INTSXP,myCEL->noutliers[0],2)); for (i =0; i < myCEL->noutliers[0]; i++){ INTEGER(OUTLIERS)[i] = (int)myCEL->outliers_x[0][i]; INTEGER(OUTLIERS)[myCEL->noutliers[0] + i] = (int)myCEL->outliers_y[0][i]; } PROTECT(dimnames = allocVector(VECSXP,2)); PROTECT(tmp_sexp = allocVector(STRSXP,2)); SET_STRING_ELT(tmp_sexp,0,mkChar("X")); SET_STRING_ELT(tmp_sexp,1,mkChar("Y")); SET_VECTOR_ELT(dimnames,1,tmp_sexp); setAttrib(OUTLIERS, R_DimNamesSymbol, dimnames); UNPROTECT(2); SET_VECTOR_ELT(theCEL,3,OUTLIERS); UNPROTECT(1); PROTECT(theCEL_names = allocVector(STRSXP,4)); SET_STRING_ELT(theCEL_names,0,mkChar("HEADER")); SET_STRING_ELT(theCEL_names,1,mkChar("INTENSITY")); SET_STRING_ELT(theCEL_names,2,mkChar("MASKS")); SET_STRING_ELT(theCEL_names,3,mkChar("OUTLIERS")); setAttrib(theCEL, R_NamesSymbol,theCEL_names); UNPROTECT(1); } else { PROTECT(INTENSITIES = allocVector(VECSXP,myCEL->multichannel)); for (k=0; k < myCEL->multichannel; k++){ PROTECT(INTENSITIES_CHANNEL = allocVector(VECSXP,3)); PROTECT(INTENSITIES_VALUES = allocVector(REALSXP,(myCEL->header.cols)*(myCEL->header.rows))); if (!read_intensities_only){ PROTECT(INTENSITIES_STDDEV = allocVector(REALSXP,(myCEL->header.cols)*(myCEL->header.rows))); PROTECT(INTENSITIES_NPIXELS = allocVector(REALSXP,(myCEL->header.cols)*(myCEL->header.rows))); } else { INTENSITIES_STDDEV = R_NilValue; INTENSITIES_NPIXELS = R_NilValue; } for (i =0; i < (myCEL->header.cols)*(myCEL->header.rows); i++){ REAL(INTENSITIES_VALUES)[i] = myCEL->intensities[k][i]; if (!read_intensities_only){ REAL(INTENSITIES_STDDEV)[i] = myCEL->stddev[k][i]; REAL(INTENSITIES_NPIXELS)[i] = myCEL->npixels[k][i]; } } SET_VECTOR_ELT(INTENSITIES_CHANNEL,0,INTENSITIES_VALUES); SET_VECTOR_ELT(INTENSITIES_CHANNEL,1,INTENSITIES_STDDEV); SET_VECTOR_ELT(INTENSITIES_CHANNEL,2,INTENSITIES_NPIXELS); if (!read_intensities_only){ UNPROTECT(3); } else { UNPROTECT(1); } PROTECT(INTENSITIESnames=allocVector(STRSXP,3)); SET_STRING_ELT(INTENSITIESnames,0,mkChar("MEAN")); SET_STRING_ELT(INTENSITIESnames,1,mkChar("STDEV")); SET_STRING_ELT(INTENSITIESnames,2,mkChar("NPIXELS")); setAttrib(INTENSITIES_CHANNEL, R_NamesSymbol, INTENSITIESnames); UNPROTECT(1); SET_VECTOR_ELT(INTENSITIES,k,INTENSITIES_CHANNEL); UNPROTECT(1); } SET_VECTOR_ELT(theCEL,1,INTENSITIES); UNPROTECT(1); PROTECT(MASKS= allocVector(VECSXP,myCEL->multichannel)); for (k=0; k < myCEL->multichannel; k++){ PROTECT(MASKS_CHANNEL = allocMatrix(INTSXP,myCEL->nmasks[k],2)); for (i =0; i < myCEL->nmasks[k]; i++){ INTEGER(MASKS_CHANNEL)[i] = (int)(myCEL->masks_x[k][i]); INTEGER(MASKS_CHANNEL)[myCEL->nmasks[k] + i] = (int)(myCEL->masks_y[k][i]); } PROTECT(dimnames = allocVector(VECSXP,2)); PROTECT(tmp_sexp = allocVector(STRSXP,2)); SET_STRING_ELT(tmp_sexp,0,mkChar("X")); SET_STRING_ELT(tmp_sexp,1,mkChar("Y")); SET_VECTOR_ELT(dimnames,1,tmp_sexp); setAttrib(MASKS_CHANNEL, R_DimNamesSymbol, dimnames); UNPROTECT(2); SET_VECTOR_ELT(MASKS,k,MASKS_CHANNEL); UNPROTECT(1); } SET_VECTOR_ELT(theCEL,2,MASKS); UNPROTECT(1); PROTECT(OUTLIERS= allocVector(VECSXP,myCEL->multichannel)); for (k=0; k < myCEL->multichannel; k++){ PROTECT(OUTLIERS_CHANNEL = allocMatrix(INTSXP,myCEL->noutliers[k],2)); for (i =0; i < myCEL->noutliers[k]; i++){ INTEGER(OUTLIERS_CHANNEL)[i] = (int)myCEL->outliers_x[k][i]; INTEGER(OUTLIERS_CHANNEL)[myCEL->noutliers[k] + i] = (int)myCEL->outliers_y[k][i]; } PROTECT(dimnames = allocVector(VECSXP,2)); PROTECT(tmp_sexp = allocVector(STRSXP,2)); SET_STRING_ELT(tmp_sexp,0,mkChar("X")); SET_STRING_ELT(tmp_sexp,1,mkChar("Y")); SET_VECTOR_ELT(dimnames,1,tmp_sexp); setAttrib(OUTLIERS_CHANNEL, R_DimNamesSymbol, dimnames); UNPROTECT(2); SET_VECTOR_ELT(OUTLIERS,k,OUTLIERS_CHANNEL); UNPROTECT(1); } SET_VECTOR_ELT(theCEL,3,OUTLIERS); UNPROTECT(1); PROTECT(MULTICHANNELFLAG=allocVector(LGLSXP,1)); SET_VECTOR_ELT(theCEL,4,MULTICHANNELFLAG); UNPROTECT(1); PROTECT(CHANNELNAMES=allocVector(STRSXP,myCEL->multichannel)); for (k =0; k < myCEL->multichannel; k++){ SET_STRING_ELT(CHANNELNAMES,k,mkChar(myCEL->channelnames[k])); } SET_VECTOR_ELT(theCEL,5,CHANNELNAMES); UNPROTECT(1); PROTECT(theCEL_names = allocVector(STRSXP,6)); SET_STRING_ELT(theCEL_names,0,mkChar("HEADER")); SET_STRING_ELT(theCEL_names,1,mkChar("INTENSITY")); SET_STRING_ELT(theCEL_names,2,mkChar("MASKS")); SET_STRING_ELT(theCEL_names,3,mkChar("OUTLIERS")); SET_STRING_ELT(theCEL_names,4,mkChar("MULTICHANNEL")); SET_STRING_ELT(theCEL_names,5,mkChar("CHANNELNAMES")); setAttrib(theCEL, R_NamesSymbol,theCEL_names); UNPROTECT(1); } Free(myCEL->header.cdfName); Free(myCEL->header.DatHeader); Free(myCEL->header.Algorithm); Free(myCEL->header.AlgorithmParameters); if (!myCEL->multichannel){ Free(myCEL->intensities[0]); if (!read_intensities_only){ Free(myCEL->stddev[0]); Free(myCEL->npixels[0]); } } else { for (k =0; k < myCEL->multichannel; k++){ Free(myCEL->intensities[k]); if (!read_intensities_only){ Free(myCEL->stddev[k]); Free(myCEL->npixels[k]); } } } Free(myCEL->intensities); if (!read_intensities_only){ Free(myCEL->stddev); Free(myCEL->npixels); } if (!myCEL->multichannel){ Free(myCEL->masks_x[0]); Free(myCEL->masks_y[0]); Free(myCEL->outliers_x[0]); Free(myCEL->outliers_y[0]); } else { for (k =0; k < myCEL->multichannel; k++){ Free(myCEL->masks_x[k]); Free(myCEL->masks_y[k]); Free(myCEL->outliers_x[k]); Free(myCEL->outliers_y[k]); } } Free(myCEL->masks_x); Free(myCEL->masks_y); Free(myCEL->outliers_x); Free(myCEL->outliers_y); Free(myCEL); UNPROTECT(1); return theCEL; } affyio/src/read_abatch.h0000644000175400017540000000214713556116171016244 0ustar00biocbuildbiocbuild#ifndef READ_ABATCH_H #define READ_ABATCH_H /**************************************************************** ** ** A structure for holding full header information ** ** ** ***************************************************************/ typedef struct{ char *cdfName; int cols; int rows; int GridCornerULx,GridCornerULy; /* XY coordinates of the upper left grid corner in pixel coordinates.*/ int GridCornerURx,GridCornerURy; /* XY coordinates of the upper right grid corner in pixel coordinates.*/ int GridCornerLRx,GridCornerLRy; /* XY coordinates of the lower right grid corner in pixel coordinates.*/ int GridCornerLLx,GridCornerLLy; /* XY coordinates of the lower left grid corner in pixel coordinates.*/ char *DatHeader; char *Algorithm; char *AlgorithmParameters; char *ScanDate; } detailed_header_info; SEXP read_abatch(SEXP filenames, SEXP rm_mask, SEXP rm_outliers, SEXP rm_extra, SEXP ref_cdfName, SEXP ref_dim, SEXP verbose); SEXP read_abatch_stddev(SEXP filenames, SEXP rm_mask, SEXP rm_outliers, SEXP rm_extra, SEXP ref_cdfName, SEXP ref_dim, SEXP verbose); #endif affyio/src/read_bpmap.c0000644000175400017540000005676713556116171016135 0ustar00biocbuildbiocbuild/**************************************************************** ** ** File: read_bpmap.c ** ** Implementation by: B. M. Bolstad ** ** Copyright (C) B. M. Bolstad 2006-2007 ** ** A parser designed to read bpmap files into an R List structure ** ** History ** Mar 11, 2006 - Initial version ** Mar 12, 2006 - add additional support for versions 2 and 3 ** May 31, 2006 - Fix some compiler warnings ** June 12, 2006 - fix naming vector length issue. ** June 12, 2007 - much wailing and grinding of teeth, but finally a fix for reading version number right. ** Aug 25, 2007 - Move file reading functions to centralized location ** Mar 14, 2008 - Fix reading of version number for big endian platforms ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues ** *******************************************************************/ #include #include #include "stdlib.h" #include "stdio.h" #include "fread_functions.h" /**************************************************************** ** ** ** ** ** Note BPMAP files are stored in big endian format ** *******************************************************************/ /************************************************************************* ** ** Code for reading from the big endian binary files, doing bit flipping if ** necessary (on little-endian machines) ** ** ************************************************************************/ static void swap_float_4(float *tnf4) /* 4 byte floating point numbers */ { int tni = (int)(*tnf4); tni=(((tni>>24)&0xff) | ((tni&0xff)<<24) | ((tni>>8)&0xff00) | ((tni&0xff00)<<8)); *tnf4 = (float)tni; } static SEXP ReadBPMAPHeader(FILE *infile){ SEXP Header; SEXP tmpSXP; char *Magicnumber = R_alloc(8,sizeof(char)); float version_number = 0.0; unsigned int unsigned_version_number_int; unsigned int n_seq; static double new_version_number; fread_be_char(Magicnumber,8,infile); if (strncmp(Magicnumber,"PHT7",4) !=0){ error("Based on the magic number which was %s, this does not appear to be a BPMAP file",Magicnumber); } /* version number is a little bit funky need to do some funny things to coax it into the right format */ /* cast to integer, swap bytes, cast to float */ /* fread_be_float32(&version_number,1,infile); */ fread_float32(&version_number,1,infile); swap_float_4(&version_number); new_version_number = (double)version_number; /* // Rprintf("A %f\n",version_number);*/ if ((version_number <=0.5) || (version_number > 3.5)){ /* // Rprintf("Rereading\n"); */ fseek(infile,-sizeof(float),SEEK_CUR); fread_be_uint32(&unsigned_version_number_int,1,infile); memcpy(&version_number,&unsigned_version_number_int, sizeof(float)); new_version_number = (double)version_number; } fread_be_uint32(&n_seq,1,infile); PROTECT(Header=allocVector(VECSXP,3)); PROTECT(tmpSXP=allocVector(STRSXP,1)); SET_STRING_ELT(tmpSXP,0,mkChar(Magicnumber)); SET_VECTOR_ELT(Header,0,tmpSXP); UNPROTECT(1); PROTECT(tmpSXP=allocVector(REALSXP,1)); REAL(tmpSXP)[0] = (double)new_version_number; SET_VECTOR_ELT(Header,1,tmpSXP); UNPROTECT(1); PROTECT(tmpSXP=allocVector(INTSXP,1)); INTEGER(tmpSXP)[0] = (int)n_seq; SET_VECTOR_ELT(Header,2,tmpSXP); UNPROTECT(1); PROTECT(tmpSXP=allocVector(STRSXP,3)); SET_STRING_ELT(tmpSXP,0,mkChar("magic.number")); SET_STRING_ELT(tmpSXP,1,mkChar("version")); SET_STRING_ELT(tmpSXP,2,mkChar("n.seq")); setAttrib(Header,R_NamesSymbol,tmpSXP); UNPROTECT(2); /* Rprintf("D %f %f\n",version_number,new_version_number); */ return Header; } static SEXP ReadBPMAPSeqDescription(FILE *infile, float version, int nseq){ SEXP SequenceDescriptionList; SEXP CurSequenceDescription = R_NilValue; SEXP tmpSXP,tmpSXP2; int i,j; unsigned int seq_name_length; char *seq_name; unsigned int probe_mapping_type; unsigned int seq_file_offset; unsigned int n_probes; unsigned int group_name_length; char *group_name; unsigned int version_number_length; char *version_number; unsigned int number_parameters; unsigned int param_length; char *param_name; /* Rprintf("%f %d\n",version,nseq); */ PROTECT(SequenceDescriptionList=allocVector(VECSXP,(int)nseq)); for (i=0; i < nseq; i++){ fread_be_uint32(&seq_name_length,1,infile); seq_name = (char *)Calloc(seq_name_length+1,char); fread_be_char(seq_name,seq_name_length,infile); if (version == 3.00){ PROTECT(CurSequenceDescription=allocVector(VECSXP,8)); PROTECT(tmpSXP=allocVector(STRSXP,7)); SET_STRING_ELT(tmpSXP,0,mkChar("Name")); SET_STRING_ELT(tmpSXP,1,mkChar("ProbeMappingType")); SET_STRING_ELT(tmpSXP,2,mkChar("SequenceFileOffset")); SET_STRING_ELT(tmpSXP,3,mkChar("n.probepairs")); SET_STRING_ELT(tmpSXP,4,mkChar("GroupName")); SET_STRING_ELT(tmpSXP,5,mkChar("VersionNumber")); SET_STRING_ELT(tmpSXP,6,mkChar("NumberOfParameters")); SET_STRING_ELT(tmpSXP,7,mkChar("Parameters")); setAttrib(CurSequenceDescription,R_NamesSymbol,tmpSXP); UNPROTECT(1); } else if (version == 2.00){ PROTECT(CurSequenceDescription=allocVector(VECSXP,6)); PROTECT(tmpSXP=allocVector(STRSXP,6)); SET_STRING_ELT(tmpSXP,0,mkChar("Name")); SET_STRING_ELT(tmpSXP,1,mkChar("n.probepairs")); SET_STRING_ELT(tmpSXP,2,mkChar("GroupName")); SET_STRING_ELT(tmpSXP,3,mkChar("VersionNumber")); SET_STRING_ELT(tmpSXP,4,mkChar("NumberOfParameters")); SET_STRING_ELT(tmpSXP,5,mkChar("Parameters")); setAttrib(CurSequenceDescription,R_NamesSymbol,tmpSXP); UNPROTECT(1); } else if (version == 1.00){ PROTECT(CurSequenceDescription=allocVector(VECSXP,2)); PROTECT(tmpSXP=allocVector(STRSXP,2)); SET_STRING_ELT(tmpSXP,0,mkChar("Name")); SET_STRING_ELT(tmpSXP,1,mkChar("n.probepairs")); setAttrib(CurSequenceDescription,R_NamesSymbol,tmpSXP); UNPROTECT(1); } PROTECT(tmpSXP=allocVector(STRSXP,1)); SET_STRING_ELT(tmpSXP,0,mkChar(seq_name)); SET_VECTOR_ELT(CurSequenceDescription,0,tmpSXP); UNPROTECT(1); Free(seq_name); if (version == 1.0){ fread_be_uint32(&n_probes,1,infile); PROTECT(tmpSXP=allocVector(INTSXP,1)); INTEGER(tmpSXP)[0] = n_probes; SET_VECTOR_ELT(CurSequenceDescription,1,tmpSXP); UNPROTECT(1); } else if (version ==2.0){ fread_be_uint32(&n_probes,1,infile); PROTECT(tmpSXP=allocVector(INTSXP,1)); INTEGER(tmpSXP)[0] = n_probes; SET_VECTOR_ELT(CurSequenceDescription,1,tmpSXP); UNPROTECT(1); fread_be_uint32(&group_name_length,1,infile); group_name = (char *)Calloc(group_name_length+1,char); fread_be_char(group_name,group_name_length,infile); PROTECT(tmpSXP=allocVector(STRSXP,1)); SET_STRING_ELT(tmpSXP,0,mkChar(group_name)); SET_VECTOR_ELT(CurSequenceDescription,2,tmpSXP); UNPROTECT(1); Free(group_name); fread_be_uint32(&version_number_length,1,infile); version_number = (char *)Calloc(version_number_length+1,char); fread_be_char(version_number,version_number_length,infile); PROTECT(tmpSXP=allocVector(STRSXP,1)); SET_STRING_ELT(tmpSXP,0,mkChar(version_number)); SET_VECTOR_ELT(CurSequenceDescription,3,tmpSXP); UNPROTECT(1); Free(version_number); fread_be_uint32(&number_parameters,1,infile); PROTECT(tmpSXP=allocVector(INTSXP,1)); INTEGER(tmpSXP)[0] = number_parameters; SET_VECTOR_ELT(CurSequenceDescription,4,tmpSXP); UNPROTECT(1); PROTECT(tmpSXP=allocVector(VECSXP,number_parameters)); for (j=0; j < number_parameters; j++){ PROTECT(tmpSXP2 = allocVector(STRSXP,2)); fread_be_uint32(¶m_length,1,infile); param_name = (char *)Calloc(param_length+1,char); fread_be_char(param_name,param_length,infile); SET_STRING_ELT(tmpSXP2,0,mkChar(param_name)); Free(param_name); fread_be_uint32(¶m_length,1,infile); param_name = (char *)Calloc(param_length+1,char); fread_be_char(param_name,param_length,infile); SET_STRING_ELT(tmpSXP2,1,mkChar(param_name)); Free(param_name); SET_VECTOR_ELT(tmpSXP,j,tmpSXP2); UNPROTECT(1); } SET_VECTOR_ELT(CurSequenceDescription,5,tmpSXP); UNPROTECT(1); } else if (version ==3.0){ fread_be_uint32(&probe_mapping_type,1,infile); PROTECT(tmpSXP=allocVector(INTSXP,1)); INTEGER(tmpSXP)[0] = probe_mapping_type; SET_VECTOR_ELT(CurSequenceDescription,1,tmpSXP); UNPROTECT(1); fread_be_uint32(&seq_file_offset,1,infile); PROTECT(tmpSXP=allocVector(INTSXP,1)); INTEGER(tmpSXP)[0] = seq_file_offset; SET_VECTOR_ELT(CurSequenceDescription,2,tmpSXP); UNPROTECT(1); fread_be_uint32(&n_probes,1,infile); PROTECT(tmpSXP=allocVector(INTSXP,1)); INTEGER(tmpSXP)[0] = n_probes; SET_VECTOR_ELT(CurSequenceDescription,3,tmpSXP); UNPROTECT(1); fread_be_uint32(&group_name_length,1,infile); group_name = (char *)Calloc(group_name_length+1,char); fread_be_char(group_name,group_name_length,infile); PROTECT(tmpSXP=allocVector(STRSXP,1)); SET_STRING_ELT(tmpSXP,0,mkChar(group_name)); SET_VECTOR_ELT(CurSequenceDescription,4,tmpSXP); UNPROTECT(1); Free(group_name); fread_be_uint32(&version_number_length,1,infile); version_number = (char *)Calloc(version_number_length+1,char); fread_be_char(version_number,version_number_length,infile); PROTECT(tmpSXP=allocVector(STRSXP,1)); SET_STRING_ELT(tmpSXP,0,mkChar(version_number)); SET_VECTOR_ELT(CurSequenceDescription,5,tmpSXP); UNPROTECT(1); Free(version_number); fread_be_uint32(&number_parameters,1,infile); PROTECT(tmpSXP=allocVector(INTSXP,1)); INTEGER(tmpSXP)[0] = number_parameters; SET_VECTOR_ELT(CurSequenceDescription,6,tmpSXP); UNPROTECT(1); PROTECT(tmpSXP=allocVector(VECSXP,number_parameters)); for (j=0; j < number_parameters; j++){ PROTECT(tmpSXP2 = allocVector(STRSXP,2)); fread_be_uint32(¶m_length,1,infile); param_name = (char *)Calloc(param_length+1,char); fread_be_char(param_name,param_length,infile); SET_STRING_ELT(tmpSXP2,0,mkChar(param_name)); Free(param_name); fread_be_uint32(¶m_length,1,infile); param_name = (char *)Calloc(param_length+1,char); fread_be_char(param_name,param_length,infile); SET_STRING_ELT(tmpSXP2,1,mkChar(param_name)); Free(param_name); SET_VECTOR_ELT(tmpSXP,j,tmpSXP2); UNPROTECT(1); } SET_VECTOR_ELT(CurSequenceDescription,7,tmpSXP); UNPROTECT(1); } SET_VECTOR_ELT(SequenceDescriptionList,i,CurSequenceDescription); UNPROTECT(1); } UNPROTECT(1); return SequenceDescriptionList; } static void packedSeqTobaseStr(unsigned char probeseq[7], char *dest){ unsigned char currentchar; unsigned char firsttwobits; unsigned char secondtwobits; unsigned char thirdtwobits; unsigned char fourthtwobits; int i; /* Rprintf("\n\n\n\n\n"); */ for (i =0; i < 6;i++){ currentchar = probeseq[i]; /* extract first two bits */ firsttwobits = (currentchar & 192); secondtwobits = (currentchar & 48); thirdtwobits = (currentchar & 12); fourthtwobits = (currentchar & 3); firsttwobits = firsttwobits >> 6; secondtwobits = secondtwobits >> 4; thirdtwobits = thirdtwobits >> 2; /* Rprintf("%x %x %x %x\n",firsttwobits,secondtwobits,thirdtwobits,fourthtwobits); */ if (firsttwobits == 0){ dest[4*i +0]='A'; } if (firsttwobits == 1){ dest[4*i +0]='C'; } if (firsttwobits == 2){ dest[4*i +0]='G'; } if (firsttwobits == 3){ dest[4*i +0]='T'; } if (secondtwobits == 0){ dest[4*i +1]='A'; } if (secondtwobits == 1){ dest[4*i +1]='C'; } if (secondtwobits == 2){ dest[4*i +1]='G'; } if (secondtwobits == 3){ dest[4*i +1]='T'; } if (thirdtwobits == 0){ dest[4*i +2]='A'; } if (thirdtwobits == 1){ dest[4*i +2]='C'; } if (thirdtwobits == 2){ dest[4*i +2]='G'; } if (thirdtwobits == 3){ dest[4*i +2]='T'; } if (fourthtwobits == 0){ dest[4*i +3]='A'; } if (fourthtwobits == 1){ dest[4*i +3]='C'; } if (fourthtwobits == 2){ dest[4*i +3]='G'; } if (fourthtwobits == 3){ dest[4*i +3]='T'; } /* Rprintf("%c%c%c%c\n",dest[4*i],dest[4*i +1],dest[4*i +2], dest[4*i +3]); */ } currentchar = probeseq[6]; /* extract first two bits */ firsttwobits = (currentchar & 192); firsttwobits = firsttwobits >> 6; if (firsttwobits == 0){ dest[24]='A'; } if (firsttwobits == 1){ dest[24]='C'; } if (firsttwobits == 2){ dest[24]='G'; } if (firsttwobits == 3){ dest[24]='T'; } } static SEXP readBPMAPSeqIdPositionInfo(FILE *infile, float version, int nseq, SEXP seqDesc){ SEXP SeqIdPositionInfoList; SEXP curSeqIdPositionInfo; SEXP PositionInfo= R_NilValue; SEXP PositionInfoRowNames; SEXP tmpSEXP; SEXP xPM= R_NilValue,yPM= R_NilValue,xMM= R_NilValue,yMM= R_NilValue; SEXP PMprobeLength= R_NilValue; SEXP probeSeqString= R_NilValue; SEXP MatchScore= R_NilValue; SEXP PMposition= R_NilValue; SEXP Strand= R_NilValue; char buf[10]; char *dest; int nprobes=0; int probe_mapping_type=0; int i,j; unsigned int SeqId; unsigned int x; unsigned int y; unsigned int x_mm; unsigned int y_mm; unsigned char probelength; unsigned char probeseq[7]; float matchScore; int matchScore_int; unsigned int positionPM; unsigned char strand; PROTECT(SeqIdPositionInfoList = allocVector(VECSXP,nseq)); for (i =0; i < nseq; i++){ fread_be_uint32(&SeqId,1,infile); /*Rprintf("Seq id:%u\n",SeqId);*/ PROTECT(curSeqIdPositionInfo = allocVector(VECSXP,2)); PROTECT(tmpSEXP=allocVector(INTSXP,1)); INTEGER(tmpSEXP)[0] = (int)SeqId; SET_VECTOR_ELT(curSeqIdPositionInfo,0,tmpSEXP); UNPROTECT(1); PROTECT(tmpSEXP=allocVector(STRSXP,2)); SET_STRING_ELT(tmpSEXP,0,mkChar("Header")); SET_STRING_ELT(tmpSEXP,1,mkChar("PositionInformation")); setAttrib(curSeqIdPositionInfo,R_NamesSymbol,tmpSEXP); UNPROTECT(1); if ((version == 1.0) || (version == 2.0)){ nprobes = INTEGER(VECTOR_ELT(VECTOR_ELT(seqDesc,i),1))[0]; /* Rprintf("nprobes: %d\n",nprobes); */ probe_mapping_type = 0; /* PM/MM tiling */ PROTECT(PositionInfo = allocVector(VECSXP,9)); PROTECT(xPM = allocVector(INTSXP,nprobes)); PROTECT(yPM = allocVector(INTSXP,nprobes)); PROTECT(xMM = allocVector(INTSXP,nprobes)); PROTECT(yMM = allocVector(INTSXP,nprobes)); PROTECT(PMprobeLength = allocVector(INTSXP,nprobes)); PROTECT(probeSeqString = allocVector(STRSXP,nprobes)); PROTECT(MatchScore = allocVector(REALSXP,nprobes)); PROTECT(PMposition = allocVector(INTSXP,nprobes)); PROTECT(Strand = allocVector(STRSXP,nprobes)); SET_VECTOR_ELT(PositionInfo,0,xPM); SET_VECTOR_ELT(PositionInfo,1,yPM); SET_VECTOR_ELT(PositionInfo,2,xMM); SET_VECTOR_ELT(PositionInfo,3,yMM); SET_VECTOR_ELT(PositionInfo,4,PMprobeLength); SET_VECTOR_ELT(PositionInfo,5,probeSeqString); SET_VECTOR_ELT(PositionInfo,6,MatchScore); SET_VECTOR_ELT(PositionInfo,7,PMposition); SET_VECTOR_ELT(PositionInfo,8,Strand); UNPROTECT(9); setAttrib(PositionInfo,R_ClassSymbol,mkString("data.frame")); PROTECT(PositionInfoRowNames = allocVector(STRSXP,nprobes)); for (j=0; j < nprobes; j++){ sprintf(buf, "%d", j+1); SET_STRING_ELT(PositionInfoRowNames,j,mkChar(buf)); } setAttrib(PositionInfo, R_RowNamesSymbol, PositionInfoRowNames); UNPROTECT(1); PROTECT(tmpSEXP = allocVector(STRSXP,9)); SET_STRING_ELT(tmpSEXP,0,mkChar("x")); SET_STRING_ELT(tmpSEXP,1,mkChar("y")); SET_STRING_ELT(tmpSEXP,2,mkChar("x.mm")); SET_STRING_ELT(tmpSEXP,3,mkChar("y.mm")); SET_STRING_ELT(tmpSEXP,4,mkChar("PMLength")); SET_STRING_ELT(tmpSEXP,5,mkChar("ProbeSeq")); SET_STRING_ELT(tmpSEXP,6,mkChar("MatchScore")); SET_STRING_ELT(tmpSEXP,7,mkChar("PMPosition")); SET_STRING_ELT(tmpSEXP,8,mkChar("TargetStrand")); setAttrib(PositionInfo,R_NamesSymbol,tmpSEXP); UNPROTECT(1); } else if (version == 3.0){ nprobes = INTEGER(VECTOR_ELT(VECTOR_ELT(seqDesc,i),3))[0]; probe_mapping_type = INTEGER(VECTOR_ELT(VECTOR_ELT(seqDesc,i),1))[0]; if (probe_mapping_type == 0){ PROTECT(PositionInfo = allocVector(VECSXP,9)); PROTECT(xPM = allocVector(INTSXP,nprobes)); PROTECT(yPM = allocVector(INTSXP,nprobes)); PROTECT(xMM = allocVector(INTSXP,nprobes)); PROTECT(yMM = allocVector(INTSXP,nprobes)); PROTECT(PMprobeLength = allocVector(INTSXP,nprobes)); PROTECT(probeSeqString = allocVector(STRSXP,nprobes)); PROTECT(MatchScore = allocVector(REALSXP,nprobes)); PROTECT(PMposition = allocVector(INTSXP,nprobes)); PROTECT(Strand = allocVector(STRSXP,nprobes)); SET_VECTOR_ELT(PositionInfo,0,xPM); SET_VECTOR_ELT(PositionInfo,1,yPM); SET_VECTOR_ELT(PositionInfo,2,xMM); SET_VECTOR_ELT(PositionInfo,3,yMM); SET_VECTOR_ELT(PositionInfo,4,PMprobeLength); SET_VECTOR_ELT(PositionInfo,5,probeSeqString); SET_VECTOR_ELT(PositionInfo,6,MatchScore); SET_VECTOR_ELT(PositionInfo,7,PMposition); SET_VECTOR_ELT(PositionInfo,8,Strand); UNPROTECT(9); setAttrib(PositionInfo,R_ClassSymbol,mkString("data.frame")); PROTECT(PositionInfoRowNames = allocVector(STRSXP,nprobes)); for (j=0; j < nprobes; j++){ sprintf(buf, "%d", j+1); SET_VECTOR_ELT(PositionInfoRowNames,j,mkChar(buf)); } setAttrib(PositionInfo, R_RowNamesSymbol, PositionInfoRowNames); UNPROTECT(1); PROTECT(tmpSEXP = allocVector(STRSXP,9)); SET_STRING_ELT(tmpSEXP,0,mkChar("x")); SET_STRING_ELT(tmpSEXP,1,mkChar("y")); SET_STRING_ELT(tmpSEXP,2,mkChar("x.mm")); SET_STRING_ELT(tmpSEXP,3,mkChar("y.mm")); SET_STRING_ELT(tmpSEXP,4,mkChar("PMLength")); SET_STRING_ELT(tmpSEXP,5,mkChar("ProbeSeq")); SET_STRING_ELT(tmpSEXP,6,mkChar("MatchScore")); SET_STRING_ELT(tmpSEXP,7,mkChar("PMPosition")); SET_STRING_ELT(tmpSEXP,8,mkChar("TargetStrand")); setAttrib(PositionInfo,R_NamesSymbol,tmpSEXP); UNPROTECT(1); } else { PROTECT(PositionInfo = allocVector(VECSXP,7)); PROTECT(xPM = allocVector(INTSXP,nprobes)); PROTECT(yPM = allocVector(INTSXP,nprobes)); PROTECT(PMprobeLength = allocVector(INTSXP,nprobes)); PROTECT(probeSeqString = allocVector(STRSXP,nprobes)); PROTECT(MatchScore = allocVector(REALSXP,nprobes)); PROTECT(PMposition = allocVector(INTSXP,nprobes)); PROTECT(Strand = allocVector(STRSXP,nprobes)); SET_VECTOR_ELT(PositionInfo,0,xPM); SET_VECTOR_ELT(PositionInfo,1,yPM); SET_VECTOR_ELT(PositionInfo,2,PMprobeLength); SET_VECTOR_ELT(PositionInfo,3,probeSeqString); SET_VECTOR_ELT(PositionInfo,4,MatchScore); SET_VECTOR_ELT(PositionInfo,5,PMposition); SET_VECTOR_ELT(PositionInfo,6,Strand); UNPROTECT(7); setAttrib(PositionInfo,R_ClassSymbol,mkString("data.frame")); PROTECT(PositionInfoRowNames = allocVector(STRSXP,nprobes)); for (j=0; j < nprobes; j++){ sprintf(buf, "%d", j+1); SET_STRING_ELT(PositionInfoRowNames,j,mkChar(buf)); } setAttrib(PositionInfo, R_RowNamesSymbol, PositionInfoRowNames); UNPROTECT(1); PROTECT(tmpSEXP = allocVector(STRSXP,7)); SET_STRING_ELT(tmpSEXP,0,mkChar("x")); SET_STRING_ELT(tmpSEXP,1,mkChar("y")); SET_STRING_ELT(tmpSEXP,2,mkChar("PMLength")); SET_STRING_ELT(tmpSEXP,3,mkChar("ProbeSeq")); SET_STRING_ELT(tmpSEXP,4,mkChar("MatchScore")); SET_STRING_ELT(tmpSEXP,5,mkChar("PMPosition")); SET_STRING_ELT(tmpSEXP,6,mkChar("TargetStrand")); setAttrib(PositionInfo,R_NamesSymbol,tmpSEXP); UNPROTECT(1); } } for (j=0; j < nprobes; j++){ fread_be_uint32(&x,1,infile); fread_be_uint32(&y,1,infile); /* Rprintf("x y :%u %u\n",x,y); */ if (probe_mapping_type == 0){ fread_be_uint32(&x_mm,1,infile); fread_be_uint32(&y_mm,1,infile); } /* Rprintf("mm x y :%u %u\n",x_mm,y_mm); */ INTEGER(xPM)[j] = x; INTEGER(yPM)[j] = y; if (probe_mapping_type == 0){ INTEGER(xMM)[j] = x_mm; INTEGER(yMM)[j] = y_mm; } fread_be_uchar(&probelength,1,infile); /* Rprintf("probelength : %d\n",(int)probelength);*/ INTEGER(PMprobeLength)[j] = probelength; fread_be_uchar(probeseq,7,infile); /* Rprintf("probeseq : %s\n",probeseq); */ dest = (char *)Calloc(25+1,char); packedSeqTobaseStr(probeseq,dest); SET_STRING_ELT(probeSeqString,j,mkChar(dest)); Free(dest); /* matchScore is treated same as version number in header */ #ifdef WORDS_BIGENDIAN /* swap, cast to integer, swap bytes and cast back to float */ fread_be_float32(&matchScore,1,infile); swap_float_4(&matchScore); matchScore_int = (int)matchScore; matchScore_int=(((matchScore_int>>24)&0xff) | ((matchScore_int&0xff)<<24) | ((matchScore_int>>8)&0xff00) | ((matchScore_int&0xff00)<<8)); matchScore = (float)matchScore_int; #else /* cast to integer, swap bytes, cast to float */ fread_float32(&matchScore,1,infile); matchScore_int = (int)matchScore; matchScore_int=(((matchScore_int>>24)&0xff) | ((matchScore_int&0xff)<<24) | ((matchScore_int>>8)&0xff00) | ((matchScore_int&0xff00)<<8)); matchScore = (float)matchScore_int; #endif /* Rprintf("matchScore : %f\n",matchScore); */ REAL(MatchScore)[j] = matchScore; fread_be_uint32(&positionPM,1,infile); /* Rprintf("positionPM : %u\n",positionPM);*/ INTEGER(PMposition)[j] = positionPM; fread_be_uchar(&strand,1,infile); /* Rprintf("strand: %d\n",(int)strand);*/ if ((int)strand ==1){ SET_STRING_ELT(Strand,j,mkChar("F")); } else { SET_STRING_ELT(Strand,j,mkChar("R")); } } SET_VECTOR_ELT(curSeqIdPositionInfo,1,PositionInfo); UNPROTECT(1); SET_VECTOR_ELT(SeqIdPositionInfoList,i,curSeqIdPositionInfo); UNPROTECT(1); } UNPROTECT(1); return SeqIdPositionInfoList; } SEXP ReadBPMAPFileIntoRList(SEXP filename){ SEXP bpmapRlist; SEXP bpmapHeader; SEXP bpmapSeqDesc; SEXP tmpSXP; FILE *infile; int n_seq; float version; const char *cur_file_name; cur_file_name = CHAR(STRING_ELT(filename,0)); if ((infile = fopen(cur_file_name, "rb")) == NULL) { error("Unable to open the file %s",filename); } /* first element is header, second item is sequence descriptions third item is sequence header/position information */ PROTECT(bpmapRlist = allocVector(VECSXP,3)); PROTECT(bpmapHeader = ReadBPMAPHeader(infile)); SET_VECTOR_ELT(bpmapRlist,0,bpmapHeader); version = REAL(VECTOR_ELT(bpmapHeader,1))[0]; n_seq = INTEGER(VECTOR_ELT(bpmapHeader,2))[0]; UNPROTECT(1); /* Rprintf("version nseq: %f %d\n", version, n_seq); */ PROTECT(bpmapSeqDesc = ReadBPMAPSeqDescription(infile,version,n_seq)); SET_VECTOR_ELT(bpmapRlist,1,bpmapSeqDesc); SET_VECTOR_ELT(bpmapRlist,2,readBPMAPSeqIdPositionInfo(infile,version,n_seq,bpmapSeqDesc)); UNPROTECT(1); PROTECT(tmpSXP=allocVector(STRSXP,3)); SET_STRING_ELT(tmpSXP,0,mkChar("Header")); SET_STRING_ELT(tmpSXP,1,mkChar("SequenceDescription")); SET_STRING_ELT(tmpSXP,2,mkChar("SeqHead.PosInfo")); setAttrib(bpmapRlist,R_NamesSymbol,tmpSXP); UNPROTECT(1); UNPROTECT(1); return bpmapRlist; } affyio/src/read_cdf_xda.c0000644000175400017540000012415313556116171016407 0ustar00biocbuildbiocbuild/**************************************************************** ** ** File: read_cdf_xda.c ** ** Implementation by: B. M. Bolstad ** ** A parser designed to read the binary format cdf files. ** Sometimes called the xda format. ** ** Implemented based on documentation available from Affymetrix ** ** Implementation begun 2005. ** ** Modification Dates ** Feb 4 - Initial version ** Feb 5 - A bunch of hacks for SNP chips. ** Apr 20 ** Aug 16, 2005 - Fix potential big endian bug ** Sep 22, 2005 - Fix some signed/unsigned bugs ** Dec 1, 2005 - Comment cleaning ** Feb 28, 2006 - replace C++ comments with ANSI comments for older compilers ** May 31, 2006 - fix some compiler warnings ** Aug 23, 2006 - fix a potential (but at current time non-existant) problem ** when there are 0 qcunits or 0 units ** Aug 25, 2007 - Move file reading functions to centralized location ** Oct 27, 2007 - When building a cdfenv set NON identified values to NA (mostly affects MM for PM only arrays) ** Nov 12, 2008 - Fix crash ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues ** ****************************************************************/ /** --- includes --- */ #include #include #include "stdlib.h" #include "stdio.h" #include "fread_functions.h" #include /* #define READ_CDF_DEBUG */ /* #define READ_CDF_DEBUG_SNP */ #define READ_CDF_NOSNP /************************************************************************ ** ** Structures for holding the CDF file information. Basically ** header/general information that appears at the start of the CDF file ** ************************************************************************/ typedef struct { int magicnumber; int version_number; unsigned short rows,cols; int n_units,n_qc_units; int len_ref_seq; int i; char *ref_seq; } cdf_xda_header; /**************************************************************************** ** ** The following two structures store QC units and QC unit probe information ** ** QC information, repeated for each QC unit: ** Type - unsigned short ** Number of probes - integer ** ** Probe information, repeated for each probe in the QC unit: ** X coordinate - unsigned short ** Y coordinate - unsigned short ** Probe length - unsigned char ** Perfect match flag - unsigned char ** Background probe flag - unsigned char ** ****************************************************************************/ typedef struct{ unsigned short x; unsigned short y; unsigned char probelength; unsigned char pmflag; unsigned char bgprobeflag; } cdf_qc_probe; typedef struct{ unsigned short type; unsigned int n_probes; cdf_qc_probe *qc_probes; } cdf_qc_unit; /**************************************************************************** ** ** The following three structures store information for units (sometimes called ** probesets), blocks (of which there are one or more within a unit) and cells ** sometimes called probe of which there are one or more within each block ** ** ** Unit information, repeated for each unit: ** ** UnitType - unsigned short (1 - expression, 2 - genotyping, 3 - CustomSeq, 3 - tag) ** Direction - unsigned char ** Number of atoms - integer ** Number of blocks - integer (always 1 for expression units) ** Number of cells - integer ** Unit number (probe set number) - integer ** Number of cells per atom - unsigned char ** ** ** ** Block information, repeated for each block in the unit: ** ** Number of atoms - integer ** Number of cells - integer ** Number of cells per atom - unsigned char ** Direction - unsigned char ** The position of the first atom - integer ** - integer ** The block name - char[64] ** ** ** ** Cell information, repeated for each cell in the block: ** ** Atom number - integer ** X coordinate - unsigned short ** Y coordinate - unsigned short ** Index position (relative to sequence for resequencing units, for expression and mapping units this value is just the atom number) - integer ** Base of probe at substitution position - char ** Base of target at interrogation position - char ** ** ****************************************************************************/ typedef struct{ int atomnumber; unsigned short x; unsigned short y; int indexpos; char pbase; char tbase; } cdf_unit_cell; typedef struct{ int natoms; int ncells; unsigned char ncellperatom; unsigned char direction; int firstatom; int unused; /* in the docs this is called "unused" but by the looks of it it is actually the lastatom */ char blockname[64]; cdf_unit_cell *unit_cells; } cdf_unit_block; typedef struct{ unsigned short unittype; unsigned char direction; int natoms; int nblocks; int ncells; int unitnumber; unsigned char ncellperatom; cdf_unit_block *unit_block; } cdf_unit; /**************************************************************************** ** ** A data structure for holding CDF information read from a xda format cdf file ** ** note that this structure reads in everything including things that might not ** be of any subsequent use. ** ****************************************************************************/ typedef struct { cdf_xda_header header; /* Header information */ char **probesetnames; /* Names of probesets */ int *qc_start; /* These are used for random access */ int *units_start; cdf_qc_unit *qc_units; cdf_unit *units; } cdf_xda; /************************************************************************* ** ** int read_cdf_qcunit(cdf_qc_unit *my_unit,int filelocation,FILE *instream) ** ** cdf_qc_unit *my_unit - preallocated space to store qc unit information ** int filelocation - indexing/location information used to read information ** from file ** FILE *instream - a pre-opened file to read from ** ** reads a specificed qc_unit from the file. Allocates space for the cdf_qc_probes ** and also reads them in ** ** *************************************************************************/ int read_cdf_qcunit(cdf_qc_unit *my_unit,int filelocation,FILE *instream){ int i; fseek(instream,filelocation,SEEK_SET); fread_uint16(&(my_unit->type),1,instream); fread_uint32(&(my_unit->n_probes),1,instream); my_unit->qc_probes = Calloc(my_unit->n_probes,cdf_qc_probe); for (i=0; i < my_unit->n_probes; i++){ fread_uint16(&(my_unit->qc_probes[i].x),1,instream); fread_uint16(&(my_unit->qc_probes[i].y),1,instream); fread_uchar(&(my_unit->qc_probes[i].probelength),1,instream); fread_uchar(&(my_unit->qc_probes[i].pmflag),1,instream); fread_uchar(&(my_unit->qc_probes[i].bgprobeflag),1,instream); } return 1; } /************************************************************************* ** ** int read_cdf_unit(cdf_unit *my_unit,int filelocation,FILE *instream) ** ** cdf_qc_unit *my_unit - preallocated space to store unit (aka probeset) information ** int filelocation - indexing/location information used to read information ** from file ** FILE *instream - a pre-opened file to read from ** ** reads a specified probeset into the my_unit, including all blocks and all probes ** it is assumed that the unit itself is preallocated. Blocks and probes within ** the blocks are allocated by this function. ** *************************************************************************/ int read_cdf_unit(cdf_unit *my_unit,int filelocation,FILE *instream){ int i,j; fseek(instream,filelocation,SEEK_SET); fread_uint16(&(my_unit->unittype),1,instream); fread_uchar(&(my_unit->direction),1,instream); fread_int32(&(my_unit->natoms),1,instream); fread_int32(&(my_unit->nblocks),1,instream); fread_int32(&(my_unit->ncells),1,instream); fread_int32(&(my_unit->unitnumber),1,instream); fread_uchar(&(my_unit->ncellperatom),1,instream); my_unit->unit_block = Calloc(my_unit->nblocks,cdf_unit_block); for (i=0; i < my_unit->nblocks; i++){ fread_int32(&(my_unit->unit_block[i].natoms),1,instream); fread_int32(&(my_unit->unit_block[i].ncells),1,instream); fread_uchar(&(my_unit->unit_block[i].ncellperatom),1,instream); fread_uchar(&(my_unit->unit_block[i].direction),1,instream); fread_int32(&(my_unit->unit_block[i].firstatom),1,instream); fread_int32(&(my_unit->unit_block[i].unused),1,instream); fread_char(my_unit->unit_block[i].blockname,64,instream); my_unit->unit_block[i].unit_cells = Calloc(my_unit->unit_block[i].ncells,cdf_unit_cell); for (j=0; j < my_unit->unit_block[i].ncells; j++){ fread_int32(&(my_unit->unit_block[i].unit_cells[j].atomnumber),1,instream); fread_uint16(&(my_unit->unit_block[i].unit_cells[j].x),1,instream); fread_uint16(&(my_unit->unit_block[i].unit_cells[j].y),1,instream); fread_int32(&(my_unit->unit_block[i].unit_cells[j].indexpos),1,instream); fread_char(&(my_unit->unit_block[i].unit_cells[j].pbase),1,instream); fread_char(&(my_unit->unit_block[i].unit_cells[j].tbase),1,instream); } } return 1; } /************************************************************************* ** ** static void dealloc_cdf_xda(cdf_xda *my_cdf) ** ** Deallocates all the previously allocated memory. ** *************************************************************************/ static void dealloc_cdf_xda(cdf_xda *my_cdf){ int i; for (i=0; i < my_cdf->header.n_units; i++){ Free(my_cdf->probesetnames[i]); } Free(my_cdf->probesetnames); Free(my_cdf->qc_start); Free(my_cdf->units_start); for (i=0; i < my_cdf->header.n_qc_units; i++){ Free(my_cdf->qc_units[i].qc_probes); } Free(my_cdf->qc_units); for (i=0; i < my_cdf->header.n_units; i++){ Free(my_cdf->units[i].unit_block); } Free(my_cdf->units); Free(my_cdf->header.ref_seq); } /************************************************************* ** ** int read_cdf_xda(const char *filename) ** ** filename - Name of the prospective binary cel file ** ** Returns 1 if the file was completely successfully parsed ** otherwise 0 (and possible prints a message to screen) ** ** ** ** *************************************************************/ static int read_cdf_xda(const char *filename,cdf_xda *my_cdf){ FILE *infile; int i; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); return 0; } if (!fread_int32(&my_cdf->header.magicnumber,1,infile)){ return 0; } if (!fread_int32(&my_cdf->header.version_number,1,infile)){ return 0; } if (my_cdf->header.magicnumber != 67){ Rprintf("Magic number is not 67. This is probably not a binary cdf file.\n"); return 0; } if (my_cdf->header.version_number != 1){ Rprintf("Don't know if version %d binary cdf files can be handled.\n",my_cdf->header.version_number); return 0; } if (!fread_uint16(&my_cdf->header.cols,1,infile)){ return 0; } if (!fread_uint16(&my_cdf->header.rows,1,infile)){ return 0; } if (!fread_int32(&my_cdf->header.n_units,1,infile)){ return 0; } if (!fread_int32(&my_cdf->header.n_qc_units,1,infile)){ return 0; } if (!fread_int32(&my_cdf->header.len_ref_seq,1,infile)){ return 0; } my_cdf->header.ref_seq = Calloc(my_cdf->header.len_ref_seq,char); fread_char(my_cdf->header.ref_seq, my_cdf->header.len_ref_seq, infile); my_cdf->probesetnames = Calloc(my_cdf->header.n_units,char *); for (i =0; i < my_cdf->header.n_units;i++){ my_cdf->probesetnames[i] = Calloc(64,char); if (!fread_char(my_cdf->probesetnames[i], 64, infile)){ return 0; } } my_cdf->qc_start = Calloc(my_cdf->header.n_qc_units,int); my_cdf->units_start = Calloc(my_cdf->header.n_units,int); /*** Old code that might fail if there is 0 QCunits or 0 Units if (!fread_int32(my_cdf->qc_start,my_cdf->header.n_qc_units,infile) || !fread_int32(my_cdf->units_start,my_cdf->header.n_units,infile)){ return 0; } ***/ if (!fread_int32(my_cdf->qc_start,my_cdf->header.n_qc_units,infile)) { if(my_cdf->header.n_qc_units != 0) { return 0; } } if(!fread_int32(my_cdf->units_start,my_cdf->header.n_units,infile)) { if(my_cdf->header.n_units != 0) { return 0; } } /* We will read in all the QC and Standard Units, rather than random accessing what we need */ my_cdf->qc_units = Calloc(my_cdf->header.n_qc_units,cdf_qc_unit); for (i =0; i < my_cdf->header.n_qc_units; i++){ if (!read_cdf_qcunit(&my_cdf->qc_units[i],my_cdf->qc_start[i],infile)){ return 0; } } my_cdf->units = Calloc(my_cdf->header.n_units,cdf_unit); for (i=0; i < my_cdf->header.n_units; i++){ if (!read_cdf_unit(&my_cdf->units[i],my_cdf->units_start[i],infile)){ return 0; } } #ifdef READ_CDF_DEBUG Rprintf("%d %d %d %d %d\n",my_cdf->header.cols,my_cdf->header.rows,my_cdf->header.n_units,my_cdf->header.n_qc_units,my_cdf->header.len_ref_seq); for (i =0; i < my_cdf->header.n_units;i++){ Rprintf("%s\n",my_cdf->probesetnames[i]); } for (i =0; i < my_cdf->header.n_qc_units;i++){ Rprintf("%d\n",my_cdf->qc_start[i]); } for (i =0; i < my_cdf->header.n_qc_units;i++){ Rprintf("%d\n",my_cdf->units_start[i]); } Rprintf("%d %d\n",my_cdf->qc_units[0].type,my_cdf->qc_units[0].n_probes); for (i=0; i < my_cdf->qc_units[0].n_probes; i++){ Rprintf("%d %d %d %u %d\n",my_cdf->qc_units[0].qc_probes[i].x,my_cdf->qc_units[0].qc_probes[i].y, my_cdf->qc_units[0].qc_probes[i].probelength, my_cdf->qc_units[0].qc_probes[i].pmflag, my_cdf->qc_units[0].qc_probes[i].bgprobeflag); } Rprintf("%u %u %d %d %d %d %u\n",my_cdf->units[0].unittype,my_cdf->units[0].direction, my_cdf->units[0].natoms, my_cdf->units[0].nblocks, my_cdf->units[0].ncells, my_cdf->units[0].unitnumber, my_cdf->units[0].ncellperatom); Rprintf("%d %d %u %u %d %d %s\n",my_cdf->units[0].unit_block[0].natoms,my_cdf->units[0].unit_block[0].ncells, my_cdf->units[0].unit_block[0].ncellperatom, my_cdf->units[0].unit_block[0].direction, my_cdf->units[0].unit_block[0].firstatom, my_cdf->units[0].unit_block[0].unused, my_cdf->units[0].unit_block[0].blockname); for (i=0; i units[0].unit_block[0].ncells ; i++){ Rprintf("%d %u %u %d %c %c\n", my_cdf->units[0].unit_block[0].unit_cells[i].atomnumber, my_cdf->units[0].unit_block[0].unit_cells[i].x, my_cdf->units[0].unit_block[0].unit_cells[i].y, my_cdf->units[0].unit_block[0].unit_cells[i].indexpos, my_cdf->units[0].unit_block[0].unit_cells[i].pbase, my_cdf->units[0].unit_block[0].unit_cells[i].tbase); } #endif fclose(infile); return 1; /* fseek() */ } /************************************************************* ** ** static int check_cdf_xda(const char *filename) ** ** Opens the file give by filename and checks it to see if ** it looks like a binary CDF file. returns 0 if ** the file looks like it is not a binary CDF aka xda format ** cdf file ** ** *************************************************************/ static int check_cdf_xda(const char *filename){ FILE *infile; int magicnumber,version_number; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); return 0; } if (!fread_int32(&magicnumber,1,infile)){ error("File corrupt or truncated?"); return 0; } if (!fread_int32(&version_number,1,infile)){ error("File corrupt or truncated?"); return 0; } if (magicnumber != 67){ /* error("Magic number is not 67. This is probably not a binary cdf file.\n"); */ return 0; } if (version_number != 1){ /* error("Don't know if version %d binary cdf files can be handled.\n",my_cdf->header.version_number); */ return 0; } return 1; } /************************************************************* ** ** static int isPM(char pbase,char tbase) ** ** char pbase - probe base at substitution position ** char tbase - target base at substitution position ** ** this function works out whether a probe is a PM or MM ** ** *************************************************************/ static int isPM(char pbase,char tbase){ /* if (Pbase.Cmp(Tbase) == 0){ *isPM = false; } else if (((Pbase.Cmp("A")== 0) && (Tbase.Cmp("T") != 0)) || ((Pbase.Cmp("T") == 0) && (Tbase.Cmp("A") != 0))){ *isPM = false; } else if (((Pbase.Cmp("C")== 0) && (Tbase.Cmp("G") != 0)) || ((Pbase.Cmp("G") == 0) && (Tbase.Cmp("C") != 0))){ *isPM = false; } else { *isPM = true; } */ pbase = toupper(pbase); tbase = toupper(tbase); if (pbase == tbase){ return 0; } else if ((( pbase == 'A') && (tbase != 'T')) || (( pbase == 'T') && (tbase != 'A'))){ return 0; } else if ((( pbase == 'C') && (tbase != 'G')) || (( pbase == 'G') && (tbase != 'C'))){ return 0; } return 1; } /************************************************************* ** ** SEXP CheckCDFXDA(SEXP filename) ** ** Takes a given file name and returns 1 if it is a xda format CDF file ** otherwise it returns 0 ** *************************************************************/ SEXP CheckCDFXDA(SEXP filename){ SEXP tmp; int good; const char *cur_file_name; cur_file_name = CHAR(STRING_ELT(filename,0)); good = check_cdf_xda(cur_file_name); PROTECT(tmp= allocVector(INTSXP,1)); INTEGER(tmp)[0] = good; UNPROTECT(1); return tmp; } SEXP ReadCDFFile(SEXP filename){ SEXP CDFInfo; SEXP Dimensions; SEXP LocMap= R_NilValue,tempLocMap; SEXP CurLocs; SEXP PSnames = R_NilValue,tempPSnames; SEXP ColNames; SEXP dimnames; cdf_xda my_cdf; const char *cur_file_name; /* char *tmp_name; */ int i,j,k; int cur_blocks,cur_cells, cur_atoms; /* int which_probetype; */ int which_psname=0; cdf_unit_cell *current_cell; double *curlocs; /* int nrows, ncols; */ cur_file_name = CHAR(STRING_ELT(filename,0)); if (!read_cdf_xda(cur_file_name,&my_cdf)){ error("Problem reading binary cdf file %s. Possibly corrupted or truncated?\n",cur_file_name); } /* We output: nrows, ncols in an integer vector, plus a list of probesets PM MM locations (in the BioC style) */ PROTECT(CDFInfo = allocVector(VECSXP,2)); PROTECT(Dimensions = allocVector(REALSXP,2)); if (my_cdf.units[0].unittype ==1){ PROTECT(LocMap = allocVector(VECSXP,my_cdf.header.n_units)); PROTECT(PSnames = allocVector(STRSXP,my_cdf.header.n_units)); } else { PROTECT(tempLocMap = allocVector(VECSXP,2*my_cdf.header.n_units)); PROTECT(tempPSnames = allocVector(STRSXP,2*my_cdf.header.n_units)); } NUMERIC_POINTER(Dimensions)[0] = (double)my_cdf.header.rows; NUMERIC_POINTER(Dimensions)[1] = (double)my_cdf.header.cols; for (i=0; i < my_cdf.header.n_units; i++){ #ifdef READ_CDF_DEBUG printf("%d\n",i); #endif cur_blocks = my_cdf.units[i].nblocks; #ifdef READ_CDF_DEBUG Rprintf("New Block: "); #endif if (my_cdf.units[i].unittype ==1){ /* Expression analysis */ for (j=0; j < cur_blocks; j++){ #ifdef READ_CDF_DEBUG Rprintf("%s ",my_cdf.units[i].unit_block[j].blockname); #endif cur_cells = my_cdf.units[i].unit_block[j].ncells; cur_atoms = my_cdf.units[i].unit_block[j].natoms; SET_STRING_ELT(PSnames,i,mkChar(my_cdf.units[i].unit_block[j].blockname)); PROTECT(CurLocs = allocMatrix(REALSXP,cur_atoms,2)); PROTECT(ColNames = allocVector(STRSXP,2)); PROTECT(dimnames = allocVector(VECSXP,2)); SET_STRING_ELT(ColNames,0,mkChar("pm")); SET_STRING_ELT(ColNames,1,mkChar("mm")); curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs)); for (k=0; k < cur_atoms*2; k++){ curlocs[k] = R_NaN; } for (k=0; k < cur_cells; k++){ current_cell = &(my_cdf.units[i].unit_block[j].unit_cells[k]); if(isPM(current_cell->pbase,current_cell->tbase)){ curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.cols) + 1; /* current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; */ /* "y*", sizex, "+x+1"; */ } else { curlocs[current_cell->atomnumber+ cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.cols) + 1; /* current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; */ } } SET_VECTOR_ELT(dimnames,1,ColNames); setAttrib(CurLocs, R_DimNamesSymbol, dimnames); SET_VECTOR_ELT(LocMap,i,CurLocs); UNPROTECT(3); } } else if (my_cdf.units[i].unittype == 2){ /* Genotyping array */ #ifndef READ_CDF_NOSNP if (cur_blocks == 1){ cur_cells = my_cdf.units[i].unit_block[0].ncells; cur_atoms = my_cdf.units[i].unit_block[0].natoms; SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[0].blockname)); PROTECT(CurLocs = allocMatrix(REALSXP,cur_atoms,2)); PROTECT(ColNames = allocVector(STRSXP,2)); PROTECT(dimnames = allocVector(VECSXP,2)); SET_STRING_ELT(ColNames,0,mkChar("pm")); SET_STRING_ELT(ColNames,1,mkChar("mm")); curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs)); for (k=0; k < cur_cells; k++){ current_cell = &(my_cdf.units[i].unit_block[0].unit_cells[k]); if(isPM(current_cell->pbase,current_cell->tbase)){ curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.cols) + 1; /* current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; */ /* "y*", sizex, "+x+1"; */ } else { curlocs[current_cell->atomnumber+ cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.cols) + 1; /* current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; */ } } SET_VECTOR_ELT(dimnames,1,ColNames); setAttrib(CurLocs, R_DimNamesSymbol, dimnames); SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs); UNPROTECT(3); which_psname++; } else if (cur_blocks == 4){ for (j=0; j < cur_blocks; j++){ #ifdef READ_CDF_DEBUG_SNP Rprintf("%s %s\n",my_cdf.probesetnames[i],my_cdf.units[i].unit_block[j].blockname); #endif } j = 0; cur_cells = my_cdf.units[i].unit_block[0].ncells; cur_atoms = my_cdf.units[i].unit_block[0].natoms; if (strlen(my_cdf.units[i].unit_block[j].blockname) == 1){ tmp_name = Calloc(strlen(my_cdf.probesetnames[i])+2,char); tmp_name = strcpy(tmp_name,my_cdf.probesetnames[i]); tmp_name = strcat(tmp_name,my_cdf.units[i].unit_block[j].blockname); SET_STRING_ELT(tempPSnames,which_psname,mkChar(tmp_name)); Free(tmp_name); } else { SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[0].blockname)); } PROTECT(CurLocs = allocMatrix(REALSXP,2*cur_atoms,2)); PROTECT(ColNames = allocVector(STRSXP,2)); PROTECT(dimnames = allocVector(VECSXP,2)); SET_STRING_ELT(ColNames,0,mkChar("pm")); SET_STRING_ELT(ColNames,1,mkChar("mm")); curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs)); for (k=0; k < cur_cells; k++){ current_cell = &(my_cdf.units[i].unit_block[0].unit_cells[k]); /* Rprintf("%d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */ if(isPM(current_cell->pbase,current_cell->tbase)){ curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */ } else { curlocs[current_cell->atomnumber+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; } if (current_cell->x + current_cell->y*(my_cdf.header.rows) + 1 == 370737){ Rprintf("%d %c %c",isPM(current_cell->pbase,current_cell->tbase),current_cell->pbase,current_cell->tbase); } } j=2; cur_cells = my_cdf.units[i].unit_block[2].ncells; cur_atoms = my_cdf.units[i].unit_block[2].natoms; for (k=0; k < cur_cells; k++){ current_cell = &(my_cdf.units[i].unit_block[2].unit_cells[k]); /* Rprintf("half : %d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */ if(isPM(current_cell->pbase,current_cell->tbase)){ curlocs[current_cell->atomnumber - (cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */ } else { curlocs[current_cell->atomnumber - (cur_atoms)+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; } } SET_VECTOR_ELT(dimnames,1,ColNames); setAttrib(CurLocs, R_DimNamesSymbol, dimnames); SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs); UNPROTECT(3); which_psname++; j = 1; cur_cells = my_cdf.units[i].unit_block[1].ncells; cur_atoms = my_cdf.units[i].unit_block[1].natoms; if (strlen(my_cdf.units[i].unit_block[j].blockname) == 1){ tmp_name = Calloc(strlen(my_cdf.probesetnames[i])+2,char); tmp_name = strcpy(tmp_name,my_cdf.probesetnames[i]); tmp_name = strcat(tmp_name,my_cdf.units[i].unit_block[j].blockname); SET_STRING_ELT(tempPSnames,which_psname,mkChar(tmp_name)); Free(tmp_name); } else { SET_STRING_ELT(tempPSnames,which_psname,mkChar(my_cdf.units[i].unit_block[1].blockname)); } PROTECT(CurLocs = allocMatrix(REALSXP,2*cur_atoms,2)); PROTECT(ColNames = allocVector(STRSXP,2)); PROTECT(dimnames = allocVector(VECSXP,2)); SET_STRING_ELT(ColNames,0,mkChar("pm")); SET_STRING_ELT(ColNames,1,mkChar("mm")); curlocs = NUMERIC_POINTER(AS_NUMERIC(CurLocs)); for (k=0; k < cur_cells; k++){ current_cell = &(my_cdf.units[i].unit_block[1].unit_cells[k]); /* Rprintf("Dual : %d %d %u %u \n",cur_cells, current_cell->atomnumber,current_cell->x,current_cell->y); */ if(isPM(current_cell->pbase,current_cell->tbase)){ curlocs[current_cell->atomnumber - (cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */ } else { curlocs[current_cell->atomnumber - (cur_atoms)+ 2*cur_atoms] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; } } j=3; cur_cells = my_cdf.units[i].unit_block[3].ncells; cur_atoms = my_cdf.units[i].unit_block[3].natoms; for (k=0; k < cur_cells; k++){ current_cell = &(my_cdf.units[i].unit_block[3].unit_cells[k]); /* Rprintf("half deux : %d %d %d %u %u \n",cur_cells, current_cell->atomnumber, cur_atoms,current_cell->x,current_cell->y); */ if(isPM(current_cell->pbase,current_cell->tbase)){ curlocs[current_cell->atomnumber - (2*cur_atoms)] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; /* "y*", sizex, "+x+1"; */ } else { curlocs[current_cell->atomnumber] = current_cell->x + current_cell->y*(my_cdf.header.rows) + 1; } } SET_VECTOR_ELT(dimnames,1,ColNames); setAttrib(CurLocs, R_DimNamesSymbol, dimnames); SET_VECTOR_ELT(tempLocMap,which_psname,CurLocs); UNPROTECT(3); which_psname++; } else { error("makecdfenv does not currently know how to handle cdf files of this type (genotyping with blocks != 1 or 4.)"); } #else error("makecdfenv does not currently know how to handle cdf files of this type (genotyping)."); #endif } else { error("makecdfenv does not currently know how to handle cdf files of this type (ie not expression or genotyping)"); } #ifdef READ_CDF_DEBUG Rprintf("\n"); #endif } if (my_cdf.units[0].unittype ==2){ PROTECT(PSnames = allocVector(STRSXP,which_psname)); PROTECT(LocMap = allocVector(VECSXP,which_psname)); for (i =0; i < which_psname; i++){ SET_STRING_ELT(PSnames,i,mkChar(CHAR(STRING_ELT(tempPSnames,i)))); SET_VECTOR_ELT(LocMap,i,VECTOR_ELT(tempLocMap,i)); } } #ifdef READ_CDF_DEBUG Rprintf("%d \n",which_psname); #endif setAttrib(LocMap,R_NamesSymbol,PSnames); SET_VECTOR_ELT(CDFInfo,0,Dimensions); SET_VECTOR_ELT(CDFInfo,1,LocMap); if (my_cdf.units[0].unittype ==2){ UNPROTECT(6); } else { UNPROTECT(4); } dealloc_cdf_xda(&my_cdf); return CDFInfo; } /* This function is for reading in the entire binary cdf file and then * returing the structure in a complex list object. * The fullstructure argument is expected to be a BOOLEAN. If TRUE the * entire contents of the CDF file are returned. * If False, a modified CDFENV style structure is returned */ SEXP ReadCDFFileIntoRList(SEXP filename,SEXP fullstructure){ SEXP CDFInfo = R_NilValue; /* this is the object that will be returned */ SEXP CDFInfoNames; SEXP HEADER; /* Will store the header information */ SEXP HEADERNames; SEXP Dimensions; SEXP DimensionsNames; SEXP REFSEQ; /* Resequencing reference sequence */ SEXP UNITNAMES; SEXP FILEPOSITIONS; SEXP FILEPOSITIONSQC; SEXP FILEPOSITIONSUNITS; SEXP FILEPOSITIONSNames; SEXP QCUNITS; SEXP QCUNITSsub; SEXP QCUNITSsubNames; SEXP QCHEADER; SEXP QCHEADERNames; SEXP QCUNITSProbeInfo; SEXP QCUNITSProbeInfoX; SEXP QCUNITSProbeInfoY; SEXP QCUNITSProbeInfoPL; SEXP QCUNITSProbeInfoPMFLAG; SEXP QCUNITSProbeInfoBGFLAG; SEXP QCUNITSProbeInfoNames; SEXP QCUNITSProbeInforow_names; SEXP UNITS; SEXP tmpUNIT; SEXP tmpUNITNames; SEXP UNITSHeader; SEXP UNITSHeaderNames; SEXP tmpUNITSBlock; SEXP UNITSBlock; SEXP UNITSBlockNames; SEXP UNITSBlockHeader; SEXP UNITSBlockHeaderNames; SEXP UNITSBlockInfo; SEXP UNITSBlockInfoNames; SEXP UNITSBlockInforow_names; SEXP UNITSBlockAtom ; SEXP UNITSBlockX; SEXP UNITSBlockY; SEXP UNITSBlockIndexPos; SEXP UNITSBlockPbase; SEXP UNITSBlockTbase; char buf[11]; int i,j,k; cdf_xda my_cdf; const char *cur_file_name; cur_file_name = CHAR(STRING_ELT(filename,0)); /* Read in the xda style CDF file into memory */ if (!read_cdf_xda(cur_file_name,&my_cdf)){ error("Problem reading binary cdf file %s. Possibly corrupted or truncated?\n",cur_file_name); } if (asInteger(fullstructure)){ /* return the full structure */ PROTECT(CDFInfo = allocVector(VECSXP,5)); PROTECT(CDFInfoNames = allocVector(STRSXP,5)); SET_STRING_ELT(CDFInfoNames,0,mkChar("Header")); SET_STRING_ELT(CDFInfoNames,1,mkChar("UnitNames")); SET_STRING_ELT(CDFInfoNames,2,mkChar("FilePositions")); SET_STRING_ELT(CDFInfoNames,3,mkChar("QCUnits")); SET_STRING_ELT(CDFInfoNames,4,mkChar("Units")); setAttrib(CDFInfo,R_NamesSymbol,CDFInfoNames); UNPROTECT(1); PROTECT(HEADER = allocVector(VECSXP,2)); PROTECT(HEADERNames = allocVector(STRSXP,2)); SET_STRING_ELT(HEADERNames,0,mkChar("Dimensions")); SET_STRING_ELT(HEADERNames,1,mkChar("ReseqRefSeq")); setAttrib(HEADER,R_NamesSymbol,HEADERNames); UNPROTECT(1); PROTECT(Dimensions = allocVector(REALSXP,7)); NUMERIC_POINTER(Dimensions)[0] = (double)my_cdf.header.magicnumber; NUMERIC_POINTER(Dimensions)[1] = (double)my_cdf.header.version_number; NUMERIC_POINTER(Dimensions)[2] = (double)my_cdf.header.cols; NUMERIC_POINTER(Dimensions)[3] = (double)my_cdf.header.rows; NUMERIC_POINTER(Dimensions)[4] = (double)my_cdf.header.n_qc_units; NUMERIC_POINTER(Dimensions)[5] = (double)my_cdf.header.n_units; NUMERIC_POINTER(Dimensions)[6] = (double)my_cdf.header.len_ref_seq; PROTECT(DimensionsNames = allocVector(STRSXP,7)); SET_STRING_ELT(DimensionsNames,0,mkChar("MagicNumber")); SET_STRING_ELT(DimensionsNames,1,mkChar("VersionNumber")); SET_STRING_ELT(DimensionsNames,2,mkChar("Cols")); SET_STRING_ELT(DimensionsNames,3,mkChar("Rows")); SET_STRING_ELT(DimensionsNames,4,mkChar("n.QCunits")); SET_STRING_ELT(DimensionsNames,5,mkChar("n.units")); SET_STRING_ELT(DimensionsNames,6,mkChar("LenRefSeq")); setAttrib(Dimensions,R_NamesSymbol,DimensionsNames); SET_VECTOR_ELT(HEADER,0,Dimensions); UNPROTECT(2); PROTECT(REFSEQ = allocVector(STRSXP,1)); SET_STRING_ELT(REFSEQ,0,mkChar(my_cdf.header.ref_seq)); SET_VECTOR_ELT(HEADER,1,REFSEQ); UNPROTECT(1); SET_VECTOR_ELT(CDFInfo,0,HEADER); UNPROTECT(1); PROTECT(UNITNAMES = allocVector(STRSXP,my_cdf.header.n_units)); for (i =0; i < my_cdf.header.n_units; i++){ SET_STRING_ELT(UNITNAMES,i,mkChar(my_cdf.probesetnames[i])); } SET_VECTOR_ELT(CDFInfo,1,UNITNAMES); UNPROTECT(1); PROTECT(FILEPOSITIONS = allocVector(VECSXP,2)); PROTECT(FILEPOSITIONSQC = allocVector(REALSXP,my_cdf.header.n_qc_units)); PROTECT(FILEPOSITIONSUNITS = allocVector(REALSXP,my_cdf.header.n_units)); for (i =0; i < my_cdf.header.n_qc_units; i++){ NUMERIC_POINTER(FILEPOSITIONSQC)[i] = (double)my_cdf.qc_start[i]; } for (i =0; i < my_cdf.header.n_units; i++){ NUMERIC_POINTER(FILEPOSITIONSUNITS)[i] = (double)my_cdf.units_start[i]; } SET_VECTOR_ELT(FILEPOSITIONS,0,FILEPOSITIONSQC); SET_VECTOR_ELT(FILEPOSITIONS,1,FILEPOSITIONSUNITS); PROTECT(FILEPOSITIONSNames = allocVector(STRSXP,2)); SET_STRING_ELT(FILEPOSITIONSNames,0,mkChar("FilePosQC")); SET_STRING_ELT(FILEPOSITIONSNames,1,mkChar("FilePosUnits")); setAttrib(FILEPOSITIONS,R_NamesSymbol,FILEPOSITIONSNames); SET_VECTOR_ELT(CDFInfo,2,FILEPOSITIONS); UNPROTECT(4); PROTECT(QCUNITS = allocVector(VECSXP,my_cdf.header.n_qc_units)); for (i =0; i < my_cdf.header.n_qc_units; i++){ PROTECT(QCUNITSsub = allocVector(VECSXP,2)); PROTECT(QCUNITSsubNames= allocVector(STRSXP,2)); SET_STRING_ELT(QCUNITSsubNames,0,mkChar("QCUnitHeader")); SET_STRING_ELT(QCUNITSsubNames,1,mkChar("QCUnitInfo")); setAttrib(QCUNITSsub,R_NamesSymbol,QCUNITSsubNames); PROTECT(QCHEADER = allocVector(REALSXP,2)); NUMERIC_POINTER(QCHEADER)[0] = (double)my_cdf.qc_units[i].type; NUMERIC_POINTER(QCHEADER)[1] = (double)my_cdf.qc_units[i].n_probes; PROTECT(QCHEADERNames = allocVector(STRSXP,2)); SET_STRING_ELT(QCHEADERNames,0,mkChar("Type")); SET_STRING_ELT(QCHEADERNames,1,mkChar("n.probes")); setAttrib(QCHEADER,R_NamesSymbol,QCHEADERNames); SET_VECTOR_ELT(QCUNITSsub,0,QCHEADER); PROTECT(QCUNITSProbeInfo = allocVector(VECSXP,5)); PROTECT(QCUNITSProbeInfoX = allocVector(REALSXP,my_cdf.qc_units[i].n_probes)); PROTECT(QCUNITSProbeInfoY = allocVector(REALSXP,my_cdf.qc_units[i].n_probes)); PROTECT(QCUNITSProbeInfoPL = allocVector(REALSXP,my_cdf.qc_units[i].n_probes)); PROTECT(QCUNITSProbeInfoPMFLAG = allocVector(REALSXP,my_cdf.qc_units[i].n_probes)); PROTECT(QCUNITSProbeInfoBGFLAG = allocVector(REALSXP,my_cdf.qc_units[i].n_probes)); for (j=0; j < my_cdf.qc_units[i].n_probes; j++){ NUMERIC_POINTER(QCUNITSProbeInfoX)[j] = (double)my_cdf.qc_units[i].qc_probes[j].x; NUMERIC_POINTER(QCUNITSProbeInfoY)[j] = (double)my_cdf.qc_units[i].qc_probes[j].y; NUMERIC_POINTER(QCUNITSProbeInfoPL)[j] = (double)my_cdf.qc_units[i].qc_probes[j].probelength; NUMERIC_POINTER(QCUNITSProbeInfoPMFLAG)[j] = (double)my_cdf.qc_units[i].qc_probes[j].pmflag; NUMERIC_POINTER(QCUNITSProbeInfoBGFLAG)[j] = (double)my_cdf.qc_units[i].qc_probes[j].bgprobeflag; } SET_VECTOR_ELT(QCUNITSProbeInfo,0,QCUNITSProbeInfoX); SET_VECTOR_ELT(QCUNITSProbeInfo,1,QCUNITSProbeInfoY); SET_VECTOR_ELT(QCUNITSProbeInfo,2,QCUNITSProbeInfoPL); SET_VECTOR_ELT(QCUNITSProbeInfo,3,QCUNITSProbeInfoPMFLAG); SET_VECTOR_ELT(QCUNITSProbeInfo,4,QCUNITSProbeInfoBGFLAG); PROTECT(QCUNITSProbeInfoNames = allocVector(STRSXP,5)); SET_STRING_ELT(QCUNITSProbeInfoNames,0,mkChar("x")); SET_STRING_ELT(QCUNITSProbeInfoNames,1,mkChar("y")); SET_STRING_ELT(QCUNITSProbeInfoNames,2,mkChar("ProbeLength")); SET_STRING_ELT(QCUNITSProbeInfoNames,3,mkChar("PMFlag")); SET_STRING_ELT(QCUNITSProbeInfoNames,4,mkChar("BGProbeFlag")); setAttrib(QCUNITSProbeInfo,R_NamesSymbol,QCUNITSProbeInfoNames); PROTECT(QCUNITSProbeInforow_names= allocVector(STRSXP,my_cdf.qc_units[i].n_probes)); for (j=0; j < my_cdf.qc_units[i].n_probes; j++){ sprintf(buf, "%d", j+1); SET_STRING_ELT(QCUNITSProbeInforow_names,j,mkChar(buf)); } setAttrib(QCUNITSProbeInfo, R_RowNamesSymbol, QCUNITSProbeInforow_names); setAttrib(QCUNITSProbeInfo,R_ClassSymbol,mkString("data.frame")); SET_VECTOR_ELT(QCUNITSsub,1,QCUNITSProbeInfo); SET_VECTOR_ELT(QCUNITS,i,QCUNITSsub); UNPROTECT(12); } SET_VECTOR_ELT(CDFInfo,3,QCUNITS); UNPROTECT(1); PROTECT(UNITS = allocVector(VECSXP,my_cdf.header.n_units)); for (i =0; i < my_cdf.header.n_units; i++){ PROTECT(tmpUNIT = allocVector(VECSXP,2)); PROTECT(tmpUNITNames = allocVector(STRSXP,2)); SET_STRING_ELT(tmpUNITNames,0,mkChar("UnitHeader")); SET_STRING_ELT(tmpUNITNames,1,mkChar("Block")); setAttrib(tmpUNIT,R_NamesSymbol,tmpUNITNames); PROTECT(UNITSHeader = allocVector(REALSXP,7)); PROTECT(UNITSHeaderNames = allocVector(STRSXP,7)); SET_STRING_ELT(UNITSHeaderNames,0,mkChar("UnitType")); SET_STRING_ELT(UNITSHeaderNames,1,mkChar("Direction")); SET_STRING_ELT(UNITSHeaderNames,2,mkChar("n.atoms")); SET_STRING_ELT(UNITSHeaderNames,3,mkChar("n.blocks")); SET_STRING_ELT(UNITSHeaderNames,4,mkChar("n.cells")); SET_STRING_ELT(UNITSHeaderNames,5,mkChar("UnitNumber")); SET_STRING_ELT(UNITSHeaderNames,6,mkChar("n.cellsperatom")); setAttrib(UNITSHeader,R_NamesSymbol,UNITSHeaderNames); NUMERIC_POINTER(UNITSHeader)[0] = (double)my_cdf.units[i].unittype; NUMERIC_POINTER(UNITSHeader)[1] = (double)my_cdf.units[i].direction; NUMERIC_POINTER(UNITSHeader)[2] = (double)my_cdf.units[i].natoms; NUMERIC_POINTER(UNITSHeader)[3] = (double)my_cdf.units[i].nblocks; NUMERIC_POINTER(UNITSHeader)[4] = (double)my_cdf.units[i].ncells; NUMERIC_POINTER(UNITSHeader)[5] = (double)my_cdf.units[i].unitnumber; NUMERIC_POINTER(UNITSHeader)[6] = (double)my_cdf.units[i].ncellperatom; PROTECT(tmpUNITSBlock = allocVector(VECSXP,my_cdf.units[i].nblocks)); for (j=0; j < my_cdf.units[i].nblocks; j++){ PROTECT(UNITSBlock = allocVector(VECSXP,3)); PROTECT(UNITSBlockNames = allocVector(STRSXP,3)); SET_STRING_ELT(UNITSBlockNames,0,mkChar("Header")); SET_STRING_ELT(UNITSBlockNames,1,mkChar("Name")); SET_STRING_ELT(UNITSBlockNames,2,mkChar("UnitInfo")); setAttrib(UNITSBlock,R_NamesSymbol,UNITSBlockNames); PROTECT(UNITSBlockHeader = allocVector(REALSXP,6)); PROTECT(UNITSBlockHeaderNames= allocVector(STRSXP,6)); SET_STRING_ELT(UNITSBlockHeaderNames,0,mkChar("n.atoms")); SET_STRING_ELT(UNITSBlockHeaderNames,1,mkChar("n.cells")); SET_STRING_ELT(UNITSBlockHeaderNames,2,mkChar("n.cellsperatom")); SET_STRING_ELT(UNITSBlockHeaderNames,3,mkChar("Direction")); SET_STRING_ELT(UNITSBlockHeaderNames,4,mkChar("firstatom")); SET_STRING_ELT(UNITSBlockHeaderNames,5,mkChar("unused")); NUMERIC_POINTER(UNITSBlockHeader)[0] = (double)my_cdf.units[i].unit_block[j].natoms; NUMERIC_POINTER(UNITSBlockHeader)[1] = (double)my_cdf.units[i].unit_block[j].ncells; NUMERIC_POINTER(UNITSBlockHeader)[2] = (double)my_cdf.units[i].unit_block[j].ncellperatom; NUMERIC_POINTER(UNITSBlockHeader)[3] = (double)my_cdf.units[i].unit_block[j].direction; NUMERIC_POINTER(UNITSBlockHeader)[4] = (double)my_cdf.units[i].unit_block[j].firstatom; NUMERIC_POINTER(UNITSBlockHeader)[5] = (double)my_cdf.units[i].unit_block[j].unused; setAttrib(UNITSBlockHeader,R_NamesSymbol,UNITSBlockHeaderNames); SET_VECTOR_ELT(UNITSBlock,0,UNITSBlockHeader); SET_VECTOR_ELT(UNITSBlock,1,mkString(my_cdf.units[i].unit_block[j].blockname)); PROTECT(UNITSBlockInfo = allocVector(VECSXP,6)); PROTECT(UNITSBlockInfoNames = allocVector(STRSXP,6)); SET_STRING_ELT(UNITSBlockInfoNames,0,mkChar("atom.number")); SET_STRING_ELT(UNITSBlockInfoNames,1,mkChar("x")); SET_STRING_ELT(UNITSBlockInfoNames,2,mkChar("y")); SET_STRING_ELT(UNITSBlockInfoNames,3,mkChar("index.position")); SET_STRING_ELT(UNITSBlockInfoNames,4,mkChar("pbase")); SET_STRING_ELT(UNITSBlockInfoNames,5,mkChar("tbase")); setAttrib(UNITSBlockInfo,R_NamesSymbol,UNITSBlockInfoNames); PROTECT(UNITSBlockInforow_names = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells)); for (k=0; k < my_cdf.units[i].unit_block[j].ncells; k++){ sprintf(buf, "%d", k+1); SET_STRING_ELT(UNITSBlockInforow_names,k,mkChar(buf)); } PROTECT(UNITSBlockAtom = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells)); PROTECT(UNITSBlockX = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells)); PROTECT(UNITSBlockY = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells)); PROTECT(UNITSBlockIndexPos = allocVector(INTSXP,my_cdf.units[i].unit_block[j].ncells)); PROTECT(UNITSBlockPbase = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells)); PROTECT(UNITSBlockTbase = allocVector(STRSXP,my_cdf.units[i].unit_block[j].ncells)); for (k=0; k < my_cdf.units[i].unit_block[j].ncells; k++){ /* Rprintf("%d %d %d\n",i,j,k); // NUMERIC_POINTER(UNITSBlockAtom)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].atomnumber; // NUMERIC_POINTER(UNITSBlockX)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].x; // NUMERIC_POINTER(UNITSBlockY)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].y; // NUMERIC_POINTER(UNITSBlockIndexPos)[k] = (double)my_cdf.units[i].unit_block[j].unit_cells[k].indexpos; */ INTEGER_POINTER(UNITSBlockAtom)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].atomnumber; INTEGER_POINTER(UNITSBlockX)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].x; INTEGER_POINTER(UNITSBlockY)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].y; INTEGER_POINTER(UNITSBlockIndexPos)[k] = (int)my_cdf.units[i].unit_block[j].unit_cells[k].indexpos; sprintf(buf, "%c",my_cdf.units[i].unit_block[j].unit_cells[k].pbase); SET_STRING_ELT(UNITSBlockPbase,k,mkChar(buf)); sprintf(buf, "%c",my_cdf.units[i].unit_block[j].unit_cells[k].tbase); SET_STRING_ELT(UNITSBlockTbase,k,mkChar(buf)); } SET_VECTOR_ELT(UNITSBlockInfo,0,UNITSBlockAtom); SET_VECTOR_ELT(UNITSBlockInfo,1,UNITSBlockX); SET_VECTOR_ELT(UNITSBlockInfo,2,UNITSBlockY); SET_VECTOR_ELT(UNITSBlockInfo,3,UNITSBlockIndexPos); SET_VECTOR_ELT(UNITSBlockInfo,4,UNITSBlockPbase); SET_VECTOR_ELT(UNITSBlockInfo,5,UNITSBlockTbase); UNPROTECT(6); setAttrib(UNITSBlockInfo, R_RowNamesSymbol, UNITSBlockInforow_names); setAttrib(UNITSBlockInfo,R_ClassSymbol,mkString("data.frame")); SET_VECTOR_ELT(UNITSBlock,2,UNITSBlockInfo); SET_VECTOR_ELT(tmpUNITSBlock,j,UNITSBlock); UNPROTECT(7); } SET_VECTOR_ELT(tmpUNIT,0,UNITSHeader); SET_VECTOR_ELT(tmpUNIT,1,tmpUNITSBlock); SET_VECTOR_ELT(UNITS,i,tmpUNIT); UNPROTECT(5); } SET_VECTOR_ELT(CDFInfo,4,UNITS); UNPROTECT(1); } else { /* return the abbreviated structure */ error("Abbreviated structure not yet implemented.\n"); } dealloc_cdf_xda(&my_cdf); UNPROTECT(1); return CDFInfo; } affyio/src/read_cdffile2.c0000644000175400017540000014410313556116171016472 0ustar00biocbuildbiocbuild/**************************************************************** ** ** File: read_cdffile2.c ** ** Implementation by: B. M. Bolstad ** ** Copyright (C) B. M. Bolstad 2005 ** ** A parser designed to read text CDF files into an R List structure ** ** Note this version only parses GC3.0 version text files (which should ** be almost all text CDF files currently used) ** ** Note that the original text CDF parser (from which this file is not in ** anyway based) was written by Laurent Gautier. That file was named ** read_cdffile.c (originally part of affy and then later makecdfenv) ** ** Implemented based on documentation available from Affymetrix ** ** Implementation begun 2005. ** ** Modification Dates ** Jul 24 - Initial version ** Sep 20 - Continued Implementation ** Sep 21 - Continued Implementation and debugging ** Sep 22 - Continued Implementation and testing ** Sep 24 - QCunit probes, Unit Block probes, Finish and tested. ** Dec 1, 2005 - Some comment cleaning. Added isTextCDFFile,CheckCDFtext ** Feb 28, 2006 - replace C++ comments with ANSI comments for older compilers ** May 31, 2006 - fix some compiler warnings ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues ** ** *******************************************************************/ #include #include #include "stdlib.h" #include "stdio.h" #define BUFFER_SIZE 1024 /***************************************************************** ** ** ** A structure for holding information in the ** "CDF" and "Chip" sections (basically header information) ** ******************************************************************/ typedef struct { char *version; char *name; int rows,cols; int numberofunits; int maxunit; int NumQCUnits; char *chipreference; } cdf_text_header; /***************************************************************** ** ** ** A structure for holding QC probe information ** Note the "CYCLES" item is ignored and never parsed ** ******************************************************************/ typedef struct { int x; int y; char *probe; int plen; int atom; int index; int match; int bg; } cdf_text_qc_probe; /******************************************************************* ** ** A structure for holding QC units information. These are ** areas of the chip that contain probes that may or may not be useful ** for QC and other purposes. ** ** *******************************************************************/ typedef struct{ int type; unsigned int n_probes; int qccontains[8]; /* either 0 or 1 for each of the eight possible fields. a 1 means that field is present.*/ cdf_text_qc_probe *qc_probes; } cdf_text_qc_unit; /******************************************************************* ** ** A structure for holding probe information for unit_blocks_probes ** ** probes are stored within blocks ** *******************************************************************/ typedef struct{ int x; int y; char *probe; char *feat; char *qual; int expos; int pos; char *cbase; char *pbase; char *tbase; int atom; int index; int codonid; int codon; int regiontype; char* region; } cdf_text_unit_block_probe; /******************************************************************* ** ** A structure holding Unit_blocks ** ** blocks are stored within units. ** blocks contain many probes ** *******************************************************************/ typedef struct{ char *name; int blocknumber; int num_atoms; int num_cells; int start_position; int stop_position; int direction; cdf_text_unit_block_probe *probes; } cdf_text_unit_block; /******************************************************************* ** ** A structure for holding "Units" AKA known as probesets ** ** Each unit contains one or more blocks. Each block contains one or ** more probes ** *******************************************************************/ typedef struct{ char *name; int direction; int num_atoms; int num_cells; int unit_number; int unit_type; int numberblocks; int MutationType; cdf_text_unit_block *blocks; } cdf_text_unit; /******************************************************************* ** ** A structure for holding a text CDF file ** ** text cdf files consist of ** basic header information ** qcunits ** - qc probes ** units (aka probesets) ** - blocks ** - probes ** ** *******************************************************************/ typedef struct{ cdf_text_header header; cdf_text_qc_unit *qc_units; cdf_text_unit *units; } cdf_text; /************************************************************** ** ** The following code is for tokenizing strings ** originally included in read_abatch.c from the affy package. ** *************************************************************/ /*************************************************************** ** ** tokenset ** ** char **tokens - a array of token strings ** int n - number of tokens in this set. ** ** a structure to hold a set of tokens. Typically a tokenset is ** created by breaking a character string based upon a set of ** delimiters. ** ** **************************************************************/ typedef struct{ char **tokens; int n; } tokenset; /****************************************************************** ** ** tokenset *tokenize(char *str, char *delimiters) ** ** char *str - a string to break into tokens ** char *delimiters - delimiters to use in breaking up the line ** ** ** RETURNS a new tokenset ** ** Given a string, split into tokens based on a set of delimitors ** *****************************************************************/ static tokenset *tokenize(char *str, char *delimiters){ int i=0; char *current_token; tokenset *my_tokenset = Calloc(1,tokenset); my_tokenset->n=0; my_tokenset->tokens = NULL; current_token = strtok(str,delimiters); while (current_token != NULL){ my_tokenset->n++; my_tokenset->tokens = Realloc(my_tokenset->tokens,my_tokenset->n,char*); my_tokenset->tokens[i] = Calloc(strlen(current_token)+1,char); strcpy(my_tokenset->tokens[i],current_token); i++; current_token = strtok(NULL,delimiters); } return my_tokenset; } /****************************************************************** ** ** int tokenset_size(tokenset *x) ** ** tokenset *x - a tokenset ** ** RETURNS the number of tokens in the tokenset ** ******************************************************************/ static int tokenset_size(tokenset *x){ return x->n; } /****************************************************************** ** ** char *get_token(tokenset *x, int i) ** ** tokenset *x - a tokenset ** int i - index of the token to return ** ** RETURNS pointer to the i'th token ** ******************************************************************/ static char *get_token(tokenset *x,int i){ return x->tokens[i]; } /****************************************************************** ** ** void delete_tokens(tokenset *x) ** ** tokenset *x - a tokenset ** ** Deallocates all the space allocated for a tokenset ** ******************************************************************/ static void delete_tokens(tokenset *x){ int i; for (i=0; i < x->n; i++){ Free(x->tokens[i]); } Free(x->tokens); Free(x); } /******************************************************************* ** ** int token_ends_with(char *token, char *ends) ** ** char *token - a string to check ** char *ends_in - we are looking for this string at the end of token ** ** ** returns 0 if no match, otherwise it returns the index of the first character ** which matchs the start of *ends. ** ** Note that there must be one additional character in "token" beyond ** the characters in "ends". So ** ** *token = "TestStr" ** *ends = "TestStr" ** ** would return 0 but if ** ** ends = "estStr" ** ** we would return 1. ** ** and if ** ** ends= "stStr" ** we would return 2 .....etc ** ** ******************************************************************/ /* static int token_ends_with(char *token, char *ends_in){ int tokenlength = strlen(token); int ends_length = strlen(ends_in); int start_pos; char *tmp_ptr; if (tokenlength <= ends_length){ // token string is too short so can't possibly end with ends return 0; } start_pos = tokenlength - ends_length; tmp_ptr = &token[start_pos]; if (strcmp(tmp_ptr,ends_in)==0){ return start_pos; } else { return 0; } } */ /****************************************************************** ** ** The following code, also from read_abatch.c is more about locating ** sections in the file and reading it in. ** ******************************************************************/ /** ** This reads a line from the specified file stream ** ** **/ static void ReadFileLine(char *buffer, int buffersize, FILE *currentFile){ if (fgets(buffer, buffersize, currentFile) == NULL){ error("End of file reached unexpectedly. Perhaps this file is truncated.\n"); } } /****************************************************************** ** ** void findStartsWith(FILE *my_file,char *starts, char *buffer) ** ** FILE *my_file - an open file to read from ** char *starts - the string to search for at the start of each line ** char *buffer - where to place the line that has been read. ** ** ** Find a line that starts with the specified character string. ** At exit buffer should contain that line ** *****************************************************************/ static void findStartsWith(FILE *my_file,char *starts, char *buffer){ int starts_len = strlen(starts); int match = 1; do { ReadFileLine(buffer, BUFFER_SIZE, my_file); match = strncmp(starts, buffer, starts_len); } while (match != 0); } /****************************************************************** ** ** void AdvanceToSection(FILE *my_file,char *sectiontitle, char *buffer) ** ** FILE *my_file - an open file ** char *sectiontitle - string we are searching for ** char *buffer - return's with line starting with sectiontitle ** ** *****************************************************************/ static void AdvanceToSection(FILE *my_file,char *sectiontitle, char *buffer){ findStartsWith(my_file,sectiontitle,buffer); } /******************************************************************* ** ** void read_cdf_header(FILE *infile, cdf_text *mycdf, char* linebuffer) ** ** FILE *infile - pointer to open file presumed to be a CDF file ** cdf_text *mycdf - structure for holding cdf file ** char *linebuffer - a place to store strings that are read in. Length ** is given by BUFFER_SIZE ** *******************************************************************/ static void read_cdf_header(FILE *infile, cdf_text *mycdf, char* linebuffer){ tokenset *cur_tokenset; /* move to the Chip section */ AdvanceToSection(infile,"[Chip]",linebuffer); findStartsWith(infile,"Name",linebuffer); /* Read the Name */ cur_tokenset = tokenize(linebuffer,"=\r\n"); mycdf->header.name = Calloc(strlen(get_token(cur_tokenset,1))+1,char); strcpy(mycdf->header.name,get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); /* Read the Rows and Cols, Number of units etc */ findStartsWith(infile,"Rows",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->header.rows = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"Cols",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->header.cols = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"NumberOfUnits",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->header.numberofunits = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"MaxUnit",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->header.maxunit = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"NumQCUnits",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->header.NumQCUnits = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"ChipReference",linebuffer); cur_tokenset = tokenize(linebuffer,"=\r\n"); if (cur_tokenset->n > 1){ mycdf->header.chipreference = Calloc(strlen(get_token(cur_tokenset,1))+1,char); strcpy(mycdf->header.chipreference,get_token(cur_tokenset,1)); } else { mycdf->header.chipreference = NULL; } delete_tokens(cur_tokenset); } /******************************************************************* ** ** void read_cdf_QCUnits_probes(FILE *infile, cdf_text *mycdf, char* linebuffer,int index) ** ** FILE *infile - an opened CDF file ** cdf_text *mycdf - a structure for holding cdf file ** char *linebuffer - temporary place to store lines of text read in ** int index - which QCunit. ** ** This function reads in the QC unit probes from the cdf file. It is assumed that the space to ** store them is already allocated. ** *******************************************************************/ static void read_cdf_QCUnits_probes(FILE *infile, cdf_text *mycdf, char* linebuffer,int index){ tokenset *cur_tokenset; int i; for (i =0; i < mycdf->qc_units[index].n_probes; i++){ ReadFileLine(linebuffer, BUFFER_SIZE, infile); cur_tokenset = tokenize(linebuffer,"=\t\r\n"); if (mycdf->qc_units[index].qccontains[0]){ mycdf->qc_units[index].qc_probes[i].x = atoi(get_token(cur_tokenset,1)); } if (mycdf->qc_units[index].qccontains[1]){ mycdf->qc_units[index].qc_probes[i].y = atoi(get_token(cur_tokenset,2)); } if (mycdf->qc_units[index].qccontains[2]){ mycdf->qc_units[index].qc_probes[i].probe=Calloc(strlen(get_token(cur_tokenset,3))+1,char); strcpy(mycdf->qc_units[index].qc_probes[i].probe,get_token(cur_tokenset,3)); } if (mycdf->qc_units[index].qccontains[3]){ mycdf->qc_units[index].qc_probes[i].plen = atoi(get_token(cur_tokenset,4)); } if (mycdf->qc_units[index].qccontains[4]){ mycdf->qc_units[index].qc_probes[i].atom = atoi(get_token(cur_tokenset,5)); } if (mycdf->qc_units[index].qccontains[5]){ mycdf->qc_units[index].qc_probes[i].index = atoi(get_token(cur_tokenset,6)); } if (mycdf->qc_units[index].qccontains[6]){ mycdf->qc_units[index].qc_probes[i].match = atoi(get_token(cur_tokenset,7)); } if (mycdf->qc_units[index].qccontains[7]){ mycdf->qc_units[index].qc_probes[i].bg = atoi(get_token(cur_tokenset,8)); } delete_tokens(cur_tokenset); } } /******************************************************************* ** ** void read_cdf_QCUnits(FILE *infile, cdf_text *mycdf, char* linebuffer) ** ** FILE *infile - an opened CDF file ** cdf_text *mycdf - a structure for holding cdf file ** char *linebuffer - temporary place to store lines of text read in ** ** Reads all the QC units. Note that it allocates the space for the probes ** it is assumed that the space for the actual QC units are already allocated ** *******************************************************************/ static void read_cdf_QCUnits(FILE *infile, cdf_text *mycdf, char* linebuffer){ tokenset *cur_tokenset; int i,j; mycdf->qc_units = Calloc(mycdf->header.NumQCUnits,cdf_text_qc_unit); for (i =0; i < mycdf->header.NumQCUnits; i++){ /* move to the next QC section */ AdvanceToSection(infile,"[QC",linebuffer); findStartsWith(infile,"Type",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->qc_units[i].type = (unsigned short)atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"NumberCells",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->qc_units[i].n_probes = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); mycdf->qc_units[i].qc_probes = Calloc(mycdf->qc_units[i].n_probes,cdf_text_qc_probe); /* Figure out which fields this QC unit has */ findStartsWith(infile,"CellHeader",linebuffer); cur_tokenset = tokenize(linebuffer,"=\t\r\n"); for (j=1; j < tokenset_size(cur_tokenset); j++){ if(strncmp("X", get_token(cur_tokenset,j), 1) == 0){ mycdf->qc_units[i].qccontains[0] =1; } else if (strncmp("Y", get_token(cur_tokenset,j), 1) == 0){ mycdf->qc_units[i].qccontains[1] =1; } else if (strncmp("PROBE",get_token(cur_tokenset,j), 5) == 0){ mycdf->qc_units[i].qccontains[2] =1; } else if (strncmp("PLEN",get_token(cur_tokenset,j), 4) == 0){ mycdf->qc_units[i].qccontains[3] =1; } else if (strncmp("ATOM",get_token(cur_tokenset,j), 4) == 0){ mycdf->qc_units[i].qccontains[4] =1; } else if (strncmp("INDEX",get_token(cur_tokenset,j), 5) == 0){ mycdf->qc_units[i].qccontains[5] =1; } else if (strncmp("MATCH",get_token(cur_tokenset,j), 5) == 0){ mycdf->qc_units[i].qccontains[6] =1; } else if (strncmp("BG",get_token(cur_tokenset,j), 2) == 0){ mycdf->qc_units[i].qccontains[7] =1; } } delete_tokens(cur_tokenset); read_cdf_QCUnits_probes(infile,mycdf,linebuffer,i); } } /******************************************************************* ** ** void read_cdf_unit_block_probes(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit,int block) ** ** FILE *infile - an opened CDF file ** cdf_text *mycdf - a structure for holding cdf file ** char *linebuffer - temporary place to store lines of text read in from the file ** int unit - which unit ** int block - which block ** ** Reads in the probes for each unit. Note that it is assumed that the ** space for the probes has actually been allocated. ** *******************************************************************/ static void read_cdf_unit_block_probes(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit,int block){ int i; tokenset *cur_tokenset; /* Read the Cell Header for the unit block */ ReadFileLine(linebuffer, BUFFER_SIZE, infile); for (i =0; i < mycdf->units[unit].blocks[block].num_cells; i++){ ReadFileLine(linebuffer, BUFFER_SIZE, infile); cur_tokenset = tokenize(linebuffer,"=\t\r\n"); mycdf->units[unit].blocks[block].probes[i].x = atoi(get_token(cur_tokenset,1)); mycdf->units[unit].blocks[block].probes[i].y = atoi(get_token(cur_tokenset,2)); mycdf->units[unit].blocks[block].probes[i].probe=Calloc(strlen(get_token(cur_tokenset,3))+1,char); strcpy(mycdf->units[unit].blocks[block].probes[i].probe,get_token(cur_tokenset,3)); mycdf->units[unit].blocks[block].probes[i].feat=Calloc(strlen(get_token(cur_tokenset,4))+1,char); strcpy(mycdf->units[unit].blocks[block].probes[i].feat,get_token(cur_tokenset,4)); mycdf->units[unit].blocks[block].probes[i].qual=Calloc(strlen(get_token(cur_tokenset,5))+1,char); strcpy(mycdf->units[unit].blocks[block].probes[i].qual,get_token(cur_tokenset,5)); mycdf->units[unit].blocks[block].probes[i].expos = atoi(get_token(cur_tokenset,6)); mycdf->units[unit].blocks[block].probes[i].pos = atoi(get_token(cur_tokenset,7)); mycdf->units[unit].blocks[block].probes[i].cbase = Calloc(strlen(get_token(cur_tokenset,8))+1,char); strcpy(mycdf->units[unit].blocks[block].probes[i].cbase,get_token(cur_tokenset,8)); mycdf->units[unit].blocks[block].probes[i].pbase = Calloc(strlen(get_token(cur_tokenset,9))+1,char); strcpy(mycdf->units[unit].blocks[block].probes[i].pbase,get_token(cur_tokenset,9)); mycdf->units[unit].blocks[block].probes[i].tbase = Calloc(strlen(get_token(cur_tokenset,10))+1,char); strcpy(mycdf->units[unit].blocks[block].probes[i].tbase,get_token(cur_tokenset,10)); mycdf->units[unit].blocks[block].probes[i].atom = atoi(get_token(cur_tokenset,11)); mycdf->units[unit].blocks[block].probes[i].index = atoi(get_token(cur_tokenset,12)); mycdf->units[unit].blocks[block].probes[i].codonid = atoi(get_token(cur_tokenset,13)); mycdf->units[unit].blocks[block].probes[i].codon = atoi(get_token(cur_tokenset,14)); mycdf->units[unit].blocks[block].probes[i].regiontype = atoi(get_token(cur_tokenset,15)); delete_tokens(cur_tokenset); } } /******************************************************************* ** ** void read_cdf_unit_block(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit) ** ** FILE *infile - an opened CDF file ** cdf_text *mycdf - a structure for holding cdf file ** char *linebuffer - temporary place to store lines of text read in from the file ** int unit - which unit ** ** Reads in all the blocks for the unit. Assumes that space for the blocks are allocated ** already. Allocates the space for the probes and calls a function to read them in. ** *******************************************************************/ static void read_cdf_unit_block(FILE *infile, cdf_text *mycdf, char* linebuffer, int unit){ tokenset *cur_tokenset; int i; for (i=0; i < mycdf->units[unit].numberblocks; i++){ findStartsWith(infile,"Name",linebuffer); cur_tokenset = tokenize(linebuffer,"=\r\n"); mycdf->units[unit].blocks[i].name = Calloc(strlen(get_token(cur_tokenset,1))+1,char); strcpy(mycdf->units[unit].blocks[i].name,get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); /* Rprintf("%s\n",mycdf->units[unit].blocks[i].name); */ findStartsWith(infile,"BlockNumber",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->units[unit].blocks[i].blocknumber = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); /* Rprintf("%d %d %d\n",unit,i,mycdf->header.numberofunits); */ findStartsWith(infile,"NumAtoms",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->units[unit].blocks[i].num_atoms = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"NumCells",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->units[unit].blocks[i].num_cells = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"StartPosition",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->units[unit].blocks[i].start_position = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"StopPosition",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->units[unit].blocks[i].stop_position = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); if (mycdf->units[unit].unit_type == 2){ findStartsWith(infile,"Direction",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->units[unit].blocks[i].direction = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); } else { mycdf->units[unit].blocks[i].direction = mycdf->units[unit].direction; } mycdf->units[unit].blocks[i].probes = Calloc(mycdf->units[unit].blocks[i].num_cells,cdf_text_unit_block_probe); read_cdf_unit_block_probes(infile,mycdf,linebuffer,unit,i); } } /******************************************************************* ** ** void read_cdf_Units(FILE *infile, cdf_text *mycdf, char* linebuffer) ** ** FILE *infile - an opened CDF file ** cdf_text *mycdf - a structure for holding cdf file ** char *linebuffer - temporary place to store lines of text read in from the file ** ** Reads in all the units allocating the space for them and then calling sub functions ** to read each block and probes within the blocks ** *******************************************************************/ static void read_cdf_Units(FILE *infile, cdf_text *mycdf, char* linebuffer){ tokenset *cur_tokenset; int i; mycdf->units = Calloc(mycdf->header.numberofunits,cdf_text_unit); for (i =0; i < mycdf->header.numberofunits; i++){ /* move to the next Unit section */ AdvanceToSection(infile,"[Unit",linebuffer); findStartsWith(infile,"Name",linebuffer); cur_tokenset = tokenize(linebuffer,"=\r\n"); mycdf->units[i].name = Calloc(strlen(get_token(cur_tokenset,1))+1,char); strcpy(mycdf->units[i].name,get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"Direction",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->units[i].direction = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"NumAtoms",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->units[i].num_atoms = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"NumCells",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->units[i].num_cells = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"UnitNumber",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->units[i].unit_number = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"UnitType",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->units[i].unit_type = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); findStartsWith(infile,"NumberBlocks",linebuffer); cur_tokenset = tokenize(linebuffer,"="); mycdf->units[i].numberblocks = atoi(get_token(cur_tokenset,1)); delete_tokens(cur_tokenset); /*Skip MutationType since only appears on one type of array */ mycdf->units[i].blocks = Calloc(mycdf->units[i].numberblocks,cdf_text_unit_block); read_cdf_unit_block(infile,mycdf,linebuffer,i); /* AdvanceToSection(infile,"[Unit",linebuffer); Rprintf("%d\n",i); */ } } /******************************************************************* ** ** int read_cdf_text(const char *filename, cdf_text *mycdf) ** ** const char *filename - name of text file ** cdf_text *mycdf - pointer to root of structure that will contain ** the contents of the CDF file at the conclusion ** of the function. ** ** RETURNS 0 if the function failed, otherwise returns 1 ** ** this function reads a text CDF file into C data structure. ** *******************************************************************/ static int read_cdf_text(const char *filename, cdf_text *mycdf){ FILE *infile; char linebuffer[BUFFER_SIZE]; /* a character buffer */ tokenset *cur_tokenset; if ((infile = fopen(filename, "r")) == NULL) { error("Unable to open the file %s",filename); return 0; } /* Check that is is a text CDF file */ ReadFileLine(linebuffer, BUFFER_SIZE, infile); if (strncmp("[CDF]", linebuffer, 5) != 0){ error("The file %s does not look like a text CDF file",filename); } /* Read the version number */ ReadFileLine(linebuffer, BUFFER_SIZE, infile); cur_tokenset = tokenize(linebuffer,"=\r\n"); if (strncmp("GC3.0", get_token(cur_tokenset,1), 5) != 0){ error("The file %s does not look like a version GC3.0 CDF file",filename); } else { mycdf->header.version = Calloc(strlen(get_token(cur_tokenset,1))+1,char); strcpy(mycdf->header.version,get_token(cur_tokenset,1)); } delete_tokens(cur_tokenset); read_cdf_header(infile,mycdf,linebuffer); read_cdf_QCUnits(infile,mycdf,linebuffer); read_cdf_Units(infile,mycdf,linebuffer); return 1; } /******************************************************************* ** ** void dealloc_cdf_text(cdf_text *my_cdf) ** ** Frees memory allocated ** ******************************************************************/ static void dealloc_cdf_text(cdf_text *my_cdf){ int i,j,k; Free(my_cdf->header.version); Free(my_cdf->header.name); if (my_cdf->header.chipreference != NULL) Free(my_cdf->header.chipreference); for (i =0; i < my_cdf->header.NumQCUnits; i++){ for (j=0; j < my_cdf->qc_units[i].n_probes; j++){ Free(my_cdf->qc_units[i].qc_probes[j].probe); } Free(my_cdf->qc_units[i].qc_probes); } for (i =0; i < my_cdf->header.numberofunits; i++){ for (j=0; j < my_cdf->units[i].numberblocks; j++){ for (k=0; k < my_cdf->units[i].blocks[j].num_cells;k++){ Free(my_cdf->units[i].blocks[j].probes[k].probe); Free(my_cdf->units[i].blocks[j].probes[k].feat); Free(my_cdf->units[i].blocks[j].probes[k].qual); Free(my_cdf->units[i].blocks[j].probes[k].cbase); Free(my_cdf->units[i].blocks[j].probes[k].pbase); Free(my_cdf->units[i].blocks[j].probes[k].tbase); } Free(my_cdf->units[i].blocks[j].probes); Free(my_cdf->units[i].blocks[j].name); } Free(my_cdf->units[i].blocks); Free(my_cdf->units[i].name); } } /******************************************************************* ** ** static int isTextCDFFile(const char *filename) ** ** const char *filename - name of file to check ** ** checks whether the supplied file is a text CDF file or not. ** uses a very simple test. ** ** Attempts to open the supplied filename. Then checks to see if the first ** 5 characters are "[CDF]" if so returns 1, otherwise 0. ** ** ******************************************************************/ static int isTextCDFFile(const char *filename){ FILE *infile; char linebuffer[BUFFER_SIZE]; /* a character buffer */ if ((infile = fopen(filename, "r")) == NULL) { error("Unable to open the file %s",filename); } /* Check that is is a text CDF file */ ReadFileLine(linebuffer, BUFFER_SIZE, infile); if (strncmp("[CDF]", linebuffer, 5) == 0){ fclose(infile); return 1; } fclose(infile); return 0; } /******************************************************************* ** ** SEXP ReadtextCDFFileIntoRList(SEXP filename) ** ** SEXP filename - name of cdffile. Should be full path to file. ** ** this function should be called from R. When supplied the name ** of a text cdf file it first parses it into a C data structure. ** ** An R list structure is then constructed from the C data structure ** ** The R list is then returned. ** ** Note no special effort is made to reduce down the information in ** the text CDF file. Instead almost everything is returned, even ** somewhat redundant information. ** ******************************************************************/ SEXP ReadtextCDFFileIntoRList(SEXP filename){ SEXP CDFInfo; /* this is the object that will be returned */ SEXP CDFInfoNames; SEXP HEADER; /* The file header */ SEXP HEADERNames; SEXP TEMPSXP; SEXP TEMPSXP2; SEXP TEMPSXP3; SEXP TEMPSXP4; SEXP QCUNITS; SEXP UNITS; /* Basically fields (possible) for QC probes */ SEXP QCUNITSProbeInfoX; SEXP QCUNITSProbeInfoY; SEXP QCUNITSProbeInfoPROBE; SEXP QCUNITSProbeInfoPL; SEXP QCUNITSProbeInfoATOM; SEXP QCUNITSProbeInfoINDEX; SEXP QCUNITSProbeInfoPMFLAG; SEXP QCUNITSProbeInfoBGFLAG; SEXP QCUNITSProbeInfoNames = R_NilValue; SEXP QCUNITSProbeInforow_names; /* Basically fields (possible) for Unit Block probes */ SEXP UNITSProbeInfoX; SEXP UNITSProbeInfoY; SEXP UNITSProbeInfoPROBE; SEXP UNITSProbeInfoFEAT; SEXP UNITSProbeInfoQUAL; SEXP UNITSProbeInfoEXPOS; SEXP UNITSProbeInfoPOS; SEXP UNITSProbeInfoCBASE; SEXP UNITSProbeInfoPBASE; SEXP UNITSProbeInfoTBASE; SEXP UNITSProbeInfoATOM; SEXP UNITSProbeInfoINDEX; SEXP UNITSProbeInfoCODONIND; SEXP UNITSProbeInfoCODON; SEXP UNITSProbeInfoREGIONTYPE; SEXP UNITSProbeInfoNames; SEXP UNITSProbeInforow_names; char buf[11]; /* temporary buffer for making names */ int i,j,k,l; int tmpsum =0; cdf_text my_cdf; const char *cur_file_name; cur_file_name = CHAR(STRING_ELT(filename,0)); if(!read_cdf_text(cur_file_name, &my_cdf)){ error("Problem reading text cdf file %s. Possibly corrupted or truncated?\n",cur_file_name); } /* Now build the R list structure */ /* return the full structure */ PROTECT(CDFInfo = allocVector(VECSXP,3)); PROTECT(CDFInfoNames = allocVector(STRSXP,3)); SET_STRING_ELT(CDFInfoNames,0,mkChar("Chip")); SET_STRING_ELT(CDFInfoNames,1,mkChar("QC")); SET_STRING_ELT(CDFInfoNames,2,mkChar("Unit")); setAttrib(CDFInfo,R_NamesSymbol,CDFInfoNames); UNPROTECT(1); /* Deal with the HEADER */ PROTECT(HEADER = allocVector(VECSXP,8)); PROTECT(HEADERNames = allocVector(STRSXP,8)); SET_STRING_ELT(HEADERNames,0,mkChar("Version")); SET_STRING_ELT(HEADERNames,1,mkChar("Name")); SET_STRING_ELT(HEADERNames,2,mkChar("Rows")); SET_STRING_ELT(HEADERNames,3,mkChar("Cols")); SET_STRING_ELT(HEADERNames,4,mkChar("NumberOfUnits")); SET_STRING_ELT(HEADERNames,5,mkChar("MaxUnit")); SET_STRING_ELT(HEADERNames,6,mkChar("NumQCUnits")); SET_STRING_ELT(HEADERNames,7,mkChar("ChipReference")); setAttrib(HEADER,R_NamesSymbol,HEADERNames); UNPROTECT(1); PROTECT(TEMPSXP = allocVector(STRSXP,1)); SET_STRING_ELT(TEMPSXP,0,mkChar(my_cdf.header.version)); SET_VECTOR_ELT(HEADER,0,TEMPSXP); UNPROTECT(1); PROTECT(TEMPSXP = allocVector(STRSXP,1)); SET_STRING_ELT(TEMPSXP,0,mkChar(my_cdf.header.name)); SET_VECTOR_ELT(HEADER,1,TEMPSXP); UNPROTECT(1); PROTECT(TEMPSXP = allocVector(REALSXP,1)); NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.rows; SET_VECTOR_ELT(HEADER,2,TEMPSXP); UNPROTECT(1); PROTECT(TEMPSXP = allocVector(REALSXP,1)); NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.cols; SET_VECTOR_ELT(HEADER,3,TEMPSXP); UNPROTECT(1); PROTECT(TEMPSXP = allocVector(REALSXP,1)); NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.numberofunits; SET_VECTOR_ELT(HEADER,4,TEMPSXP); UNPROTECT(1); PROTECT(TEMPSXP = allocVector(REALSXP,1)); NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.maxunit; SET_VECTOR_ELT(HEADER,5,TEMPSXP); UNPROTECT(1); PROTECT(TEMPSXP = allocVector(REALSXP,1)); NUMERIC_POINTER(TEMPSXP)[0] = (double)my_cdf.header.NumQCUnits; SET_VECTOR_ELT(HEADER,6,TEMPSXP); UNPROTECT(1); PROTECT(TEMPSXP = allocVector(REALSXP,1)); if (my_cdf.header.chipreference !=NULL){ SET_VECTOR_ELT(TEMPSXP,0,mkChar(my_cdf.header.chipreference)); SET_VECTOR_ELT(HEADER,7,TEMPSXP); } UNPROTECT(1); SET_VECTOR_ELT(CDFInfo,0,HEADER); PROTECT(QCUNITS = allocVector(VECSXP,my_cdf.header.NumQCUnits)); for (i=0; i ** ** Aim is to read in Affymetrix CEL files in the ** "Command Console Generic Data" File Format ** This format is sometimes known as the Calvin format ** ** As with other file format functionality in affyio ** gzipped files are accepted. ** ** The implementation here is based upon openly available ** file format information. The code here is not dependent or based ** in anyway on that in the Fusion SDK. ** ** ** History ** Sept 3, 2007 -Initial version ** Sept 9, 2007 - fix compiler warnings ** Oct 11, 2007 - fix missing DatHeader problem ** Feb 11, 2008 - add #include for inttypes.h in situations that stdint.h might not exist ** Feb 13, 2008 - fix problems with generic_get_detailed_header_info(), gzgeneric_get_detailed_header_info() ** May 18, 2009 - Add Ability to extract scan date from CEL file header ** Sep 19, 2013 - Improve ability to deal with large 64bit matrices ** Sept 4, 2017 - change gzFile * to gzFile ** *************************************************************/ #include #include #include #include #ifdef HAVE_STDINT_H #include #elif HAVE_INTTYPES_H #include #endif #include #include #include #include #include "read_generic.h" #include "read_celfile_generic.h" #include "read_abatch.h" int isGenericCelFile(const char *filename){ FILE *infile; generic_file_header file_header; generic_data_header data_header; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); return 0; } if (!read_generic_file_header(&file_header,infile)){ fclose(infile); return 0; } if (!read_generic_data_header(&data_header,infile)){ Free_generic_data_header(&data_header); fclose(infile); return 0; } if (strcmp(data_header.data_type_id.value, "affymetrix-calvin-intensity") !=0){ Free_generic_data_header(&data_header); fclose(infile); return 0; } Free_generic_data_header(&data_header); fclose(infile); return 1; } char *generic_get_header_info(const char *filename, int *dim1, int *dim2){ FILE *infile; generic_file_header file_header; generic_data_header data_header; char *cdfName = 0; nvt_triplet *triplet; AffyMIMEtypes cur_mime_type; int size; wchar_t *wchartemp=0; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); return 0; } read_generic_file_header(&file_header,infile); read_generic_data_header(&data_header,infile); /* affymetrix-array-type text/plainText/plain String is HG-U133_Plus_2 Now Trying it again. But using exposed function Its a Text/plain string value is HG-U133_Plus_2 with size 14 affymetrix-cel-cols text/x-calvin-integer-32Its a int32_t value is 1164 Now Trying it again. But using exposed function Its a int32_t value is 1164 affymetrix-cel-rows text/x-calvin-integer-32Its a int32_t value is 1164 */ triplet = find_nvt(&data_header,"affymetrix-array-type"); cur_mime_type = determine_MIMETYPE(*triplet); wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size); cdfName = Calloc(size + 1, char); wcstombs(cdfName, wchartemp, size); Free(wchartemp); triplet = find_nvt(&data_header,"affymetrix-cel-cols"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, dim1, &size); triplet = find_nvt(&data_header,"affymetrix-cel-rows"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, dim2, &size); Free_generic_data_header(&data_header); fclose(infile); return cdfName; } void generic_get_detailed_header_info(const char *filename, detailed_header_info *header_info){ FILE *infile; generic_file_header file_header; generic_data_header data_header; nvt_triplet *triplet; AffyMIMEtypes cur_mime_type; int size; int algorithm_paramsize = 0; float tempfloat; wchar_t *wchartemp=0; char *chartemp=0; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); } read_generic_file_header(&file_header,infile); read_generic_data_header(&data_header,infile); triplet = find_nvt(&data_header,"affymetrix-array-type"); cur_mime_type = determine_MIMETYPE(*triplet); wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size); header_info->cdfName = Calloc(size + 1, char); wcstombs(header_info->cdfName, wchartemp, size); Free(wchartemp); triplet = find_nvt(&data_header,"affymetrix-cel-cols"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &(header_info->cols), &size); triplet = find_nvt(&data_header,"affymetrix-cel-rows"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &(header_info->rows), &size); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridULX"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerULx = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridULY"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerULy = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridURX"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerURx = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridURY"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerURy = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridLLX"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerLLx = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridLLY"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerLLy = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridLRX"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerLRx = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridLRY"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerLRy = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-dat-header"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size); header_info->DatHeader = Calloc(size + 1, char); wcstombs(header_info->DatHeader, wchartemp, size); Free(wchartemp); } else { header_info->DatHeader = Calloc(2, char); } triplet = find_nvt(&data_header,"affymetrix-scan-date"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size); header_info->ScanDate = Calloc(size + 1, char); wcstombs(header_info->ScanDate, wchartemp, size); Free(wchartemp); } else { header_info->ScanDate = Calloc(2, char); } triplet = find_nvt(&data_header,"affymetrix-algorithm-name"); cur_mime_type = determine_MIMETYPE(*triplet); wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size); header_info->Algorithm = Calloc(size + 1, char); wcstombs(header_info->Algorithm, wchartemp, size); Free(wchartemp); // "Percentile:75;CellMargin:2;OutlierHigh:1.500;OutlierLow:1.004;AlgVersion:6.0;FixedCellSize:TRUE;FullFeatureWidth:7;FullFeatureHeight:7;IgnoreOutliersInShiftRows:FALSE;FeatureExtraction:TRUE;PoolWidthExtenstion:2;PoolHeightExtension:2;UseSubgrids:FALSE;RandomizePixels:FALSE;ErrorBasis:StdvMean;StdMult:1.00000" algorithm_paramsize = 0; header_info->AlgorithmParameters = Calloc(11, char); strncpy(header_info->AlgorithmParameters,"Percentile:",11); algorithm_paramsize+=11; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-Percentile"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 11, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"CellMargin:",11); algorithm_paramsize+=11; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-CellMargin"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 12, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"OutlierHigh:",12); algorithm_paramsize+=12; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-OutlierHigh"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 11, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"OutlierLow:",11); algorithm_paramsize+=11; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-OutlierLow"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 11, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"AlgVersion:",11); algorithm_paramsize+=11; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-AlgVersion"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 14, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"FixedCellSize:",14); algorithm_paramsize+=14; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-FixedCellSize"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 17, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"FullFeatureWidth:",17); algorithm_paramsize+=17; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-FullFeatureWidth"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 18, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"FullFeatureHeight:",18); algorithm_paramsize+=18; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-FullFeatureHeight"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 26, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"IgnoreOutliersInShiftRows:",26); algorithm_paramsize+=26; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-IgnoreOutliersInShiftRows"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 18, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"FeatureExtraction:",18); algorithm_paramsize+=18; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-FeatureExtraction"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 20, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"PoolWidthExtenstion:",20); algorithm_paramsize+=20; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-PoolWidthExtenstion"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 20, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"PoolHeightExtension:",20); algorithm_paramsize+=20; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-PoolHeightExtension"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 12, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"UseSubgrids:",12); algorithm_paramsize+=12; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-UseSubgrids"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 16, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"RandomizePixels:",16); algorithm_paramsize+=16; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-RandomizePixels"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 11, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"ErrorBasis:",11); algorithm_paramsize+=11; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-ErrorBasis"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 8, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"StdMult:",8); algorithm_paramsize+=8; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-StdMult"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); header_info->AlgorithmParameters[algorithm_paramsize] ='\0'; Free_generic_data_header(&data_header); fclose(infile); } int check_generic_cel_file(const char *filename, const char *ref_cdfName, int ref_dim_1, int ref_dim_2){ char *cdfName =0; int dim1, dim2; FILE *infile; generic_file_header file_header; generic_data_header data_header; nvt_triplet *triplet; AffyMIMEtypes cur_mime_type; int size; wchar_t *wchartemp=0; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); return 0; } read_generic_file_header(&file_header,infile); read_generic_data_header(&data_header,infile); triplet = find_nvt(&data_header,"affymetrix-array-type"); cur_mime_type = determine_MIMETYPE(*triplet); wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size); cdfName = Calloc(size + 1, char); wcstombs(cdfName, wchartemp, size); Free(wchartemp); triplet = find_nvt(&data_header,"affymetrix-cel-cols"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &dim1, &size); triplet = find_nvt(&data_header,"affymetrix-cel-rows"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &dim2, &size); Free_generic_data_header(&data_header); if ((dim1 != ref_dim_1) || (dim2 != ref_dim_2)){ error("Cel file %s does not seem to have the correct dimensions",filename); } if (strncasecmp(cdfName,ref_cdfName,strlen(ref_cdfName)) != 0){ error("Cel file %s does not seem to be of %s type",filename,ref_cdfName); } Free(cdfName); fclose(infile); return 0; } /*************************************************************** ** ** static int read_binarycel_file_intensities(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows) ** ** ** This function reads binary cel file intensities into the data matrix ** **************************************************************/ int read_genericcel_file_intensities(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ size_t i=0; FILE *infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); read_generic_data_group(&my_data_group,infile); read_generic_data_set(&my_data_set,infile); read_generic_data_set_rows(&my_data_set,infile); for (i =0; i < my_data_set.nrows; i++){ intensity[chip_num*my_data_set.nrows + i] = (double)(((float *)my_data_set.Data[0])[i]); } fclose(infile); Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); return(0); } int read_genericcel_file_stddev(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ size_t i=0; FILE *infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); read_generic_data_group(&my_data_group,infile); read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); read_generic_data_set(&my_data_set,infile); read_generic_data_set_rows(&my_data_set,infile); for (i =0; i < my_data_set.nrows; i++){ intensity[chip_num*my_data_set.nrows + i] = (double)(((float *)my_data_set.Data[0])[i]); } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); fclose(infile); return(0); } int read_genericcel_file_npixels(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ size_t i=0; FILE *infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); read_generic_data_group(&my_data_group,infile); read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); read_generic_data_set(&my_data_set,infile); read_generic_data_set_rows(&my_data_set,infile); for (i =0; i < my_data_set.nrows; i++){ intensity[chip_num*my_data_set.nrows + i] = (double)(((short *)my_data_set.Data[0])[i]); } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); fclose(infile); return(0); } void generic_get_masks_outliers(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y){ int i=0; FILE *infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); } read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); read_generic_data_group(&my_data_group,infile); /* passing the intensities */ read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the stddev */ read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the npixels */ read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Outlier" */ read_generic_data_set(&my_data_set,infile); *noutliers = my_data_set.nrows; *outliers_x = Calloc(my_data_set.nrows,short); *outliers_y = Calloc(my_data_set.nrows,short); read_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ (*outliers_x)[i] = ((short *)my_data_set.Data[0])[i]; (*outliers_y)[i] = ((short *)my_data_set.Data[1])[i]; } fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Mask" */ read_generic_data_set(&my_data_set,infile); *nmasks = my_data_set.nrows; *masks_x = Calloc(my_data_set.nrows,short); *masks_y = Calloc(my_data_set.nrows,short); read_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ (*outliers_x)[i] = ((short *)my_data_set.Data[0])[i]; (*outliers_y)[i] = ((short *)my_data_set.Data[1])[i]; } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); fclose(infile); } void generic_apply_masks(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows, int rm_mask, int rm_outliers){ size_t i=0; size_t cur_index; short cur_x, cur_y; int nrows; int size; FILE *infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; nvt_triplet *triplet; AffyMIMEtypes cur_mime_type; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); } read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); read_generic_data_group(&my_data_group,infile); triplet = find_nvt(&my_data_header,"affymetrix-cel-rows"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &nrows, &size); /* passing the intensities */ read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the stddev */ read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the npixels */ read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Outlier" */ read_generic_data_set(&my_data_set,infile); if (rm_outliers){ read_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ cur_x = ((short *)my_data_set.Data[0])[i]; cur_y = ((short *)my_data_set.Data[1])[i]; cur_index = (int)cur_x + nrows*(int)cur_y; intensity[chip_num*rows + cur_index] = R_NaN; } } fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Mask" */ read_generic_data_set(&my_data_set,infile); if (rm_mask){ read_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ cur_x = ((short *)my_data_set.Data[0])[i]; cur_y = ((short *)my_data_set.Data[1])[i]; cur_index = (int)cur_x + nrows*(int)cur_y; intensity[chip_num*rows + cur_index] = R_NaN; } } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); fclose(infile); } /******************************************************************************************************* ******************************************************************************************************* ** ** Code below supports gzipped command console format CEL files ** ******************************************************************************************************* *******************************************************************************************************/ int isgzGenericCelFile(const char *filename){ gzFile infile; generic_file_header file_header; generic_data_header data_header; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); return 0; } if (!gzread_generic_file_header(&file_header,infile)){ gzclose(infile); return 0; } if (!gzread_generic_data_header(&data_header,infile)){ Free_generic_data_header(&data_header); gzclose(infile); return 0; } if (strcmp(data_header.data_type_id.value, "affymetrix-calvin-intensity") !=0){ Free_generic_data_header(&data_header); gzclose(infile); return 0; } Free_generic_data_header(&data_header); gzclose(infile); return 1; } char *gzgeneric_get_header_info(const char *filename, int *dim1, int *dim2){ gzFile infile; generic_file_header file_header; generic_data_header data_header; char *cdfName = 0; nvt_triplet *triplet; AffyMIMEtypes cur_mime_type; int size; wchar_t *wchartemp=0; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); return 0; } gzread_generic_file_header(&file_header,infile); gzread_generic_data_header(&data_header,infile); /* affymetrix-array-type text/plainText/plain String is HG-U133_Plus_2 Now Trying it again. But using exposed function Its a Text/plain string value is HG-U133_Plus_2 with size 14 affymetrix-cel-cols text/x-calvin-integer-32Its a int32_t value is 1164 Now Trying it again. But using exposed function Its a int32_t value is 1164 affymetrix-cel-rows text/x-calvin-integer-32Its a int32_t value is 1164 */ triplet = find_nvt(&data_header,"affymetrix-array-type"); cur_mime_type = determine_MIMETYPE(*triplet); wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size); cdfName = Calloc(size + 1, char); wcstombs(cdfName, wchartemp, size); Free(wchartemp); triplet = find_nvt(&data_header,"affymetrix-cel-cols"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, dim1, &size); triplet = find_nvt(&data_header,"affymetrix-cel-rows"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, dim2, &size); Free_generic_data_header(&data_header); gzclose(infile); return cdfName; } void gzgeneric_get_detailed_header_info(const char *filename, detailed_header_info *header_info){ gzFile infile; generic_file_header file_header; generic_data_header data_header; nvt_triplet *triplet; AffyMIMEtypes cur_mime_type; int size; int algorithm_paramsize = 0; float tempfloat; wchar_t *wchartemp = 0; char *chartemp = 0; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); } gzread_generic_file_header(&file_header,infile); gzread_generic_data_header(&data_header,infile); triplet = find_nvt(&data_header,"affymetrix-array-type"); cur_mime_type = determine_MIMETYPE(*triplet); wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size); header_info->cdfName = Calloc(size + 1, char); wcstombs(header_info->cdfName, wchartemp, size); Free(wchartemp); triplet = find_nvt(&data_header,"affymetrix-cel-cols"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &(header_info->cols), &size); triplet = find_nvt(&data_header,"affymetrix-cel-rows"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &(header_info->rows), &size); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridULX"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerULx = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridULY"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerULy = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridURX"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerURx = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridURY"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerURy = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridLLX"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerLLx = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridLLY"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerLLy = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridLRX"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerLRx = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-algorithm-param-GridLRY"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &tempfloat, &size); header_info->GridCornerLRy = (int)(tempfloat +0.5); triplet = find_nvt(&data_header,"affymetrix-dat-header"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size); header_info->DatHeader = Calloc(size + 1, char); wcstombs(header_info->DatHeader, wchartemp, size); Free(wchartemp); } else { header_info->DatHeader = Calloc(2, char); } triplet = find_nvt(&data_header,"affymetrix-scan-date"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size); header_info->ScanDate = Calloc(size + 1, char); wcstombs(header_info->ScanDate, wchartemp, size); Free(wchartemp); } else { header_info->ScanDate = Calloc(2, char); } triplet = find_nvt(&data_header,"affymetrix-algorithm-name"); cur_mime_type = determine_MIMETYPE(*triplet); wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size); header_info->Algorithm = Calloc(size + 1, char); wcstombs(header_info->Algorithm, wchartemp, size); Free(wchartemp); // "Percentile:75;CellMargin:2;OutlierHigh:1.500;OutlierLow:1.004;AlgVersion:6.0;FixedCellSize:TRUE;FullFeatureWidth:7;FullFeatureHeight:7;IgnoreOutliersInShiftRows:FALSE;FeatureExtraction:TRUE;PoolWidthExtenstion:2;PoolHeightExtension:2;UseSubgrids:FALSE;RandomizePixels:FALSE;ErrorBasis:StdvMean;StdMult:1.00000" algorithm_paramsize = 0; header_info->AlgorithmParameters = Calloc(11, char); strncpy(header_info->AlgorithmParameters,"Percentile:",11); algorithm_paramsize+=11; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-Percentile"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 11, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"CellMargin:",11); algorithm_paramsize+=11; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-CellMargin"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 12, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"OutlierHigh:",12); algorithm_paramsize+=12; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-OutlierHigh"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 11, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"OutlierLow:",11); algorithm_paramsize+=11; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-OutlierLow"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 11, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"AlgVersion:",11); algorithm_paramsize+=11; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-AlgVersion"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 14, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"FixedCellSize:",14); algorithm_paramsize+=14; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-FixedCellSize"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 17, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"FullFeatureWidth:",17); algorithm_paramsize+=17; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-FullFeatureWidth"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 18, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"FullFeatureHeight:",18); algorithm_paramsize+=18; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-FullFeatureHeight"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 26, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"IgnoreOutliersInShiftRows:",26); algorithm_paramsize+=26; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-IgnoreOutliersInShiftRows"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 18, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"FeatureExtraction:",18); algorithm_paramsize+=18; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-FeatureExtraction"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 20, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"PoolWidthExtenstion:",20); algorithm_paramsize+=20; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-PoolWidthExtenstion"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 20, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"PoolHeightExtension:",20); algorithm_paramsize+=20; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-PoolHeightExtension"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 12, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"UseSubgrids:",12); algorithm_paramsize+=12; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-UseSubgrids"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 16, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"RandomizePixels:",16); algorithm_paramsize+=16; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-RandomizePixels"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 11, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"ErrorBasis:",11); algorithm_paramsize+=11; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-ErrorBasis"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 8, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize],"StdMult:",8); algorithm_paramsize+=8; triplet = find_nvt(&data_header,"affymetrix-algorithm-param-StdMult"); if (triplet != NULL){ cur_mime_type = determine_MIMETYPE(*triplet); chartemp = decode_MIME_value_toASCII(*triplet,cur_mime_type, chartemp, &size); header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + size +1, char); strncpy(&header_info->AlgorithmParameters[algorithm_paramsize], chartemp, size); algorithm_paramsize+= size +1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; Free(chartemp); } else { header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); algorithm_paramsize+= 1; header_info->AlgorithmParameters[algorithm_paramsize-1]=';'; } header_info->AlgorithmParameters = Realloc(header_info->AlgorithmParameters, algorithm_paramsize + 1, char); header_info->AlgorithmParameters[algorithm_paramsize] ='\0'; Free_generic_data_header(&data_header); gzclose(infile); } int check_gzgeneric_cel_file(const char *filename, const char *ref_cdfName, int ref_dim_1, int ref_dim_2){ char *cdfName =0; int dim1, dim2; gzFile infile; generic_file_header file_header; generic_data_header data_header; nvt_triplet *triplet; AffyMIMEtypes cur_mime_type; int size; wchar_t *wchartemp=0; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); return 0; } gzread_generic_file_header(&file_header,infile); gzread_generic_data_header(&data_header,infile); triplet = find_nvt(&data_header,"affymetrix-array-type"); cur_mime_type = determine_MIMETYPE(*triplet); wchartemp = decode_MIME_value(*triplet,cur_mime_type, wchartemp, &size); cdfName = Calloc(size + 1, char); wcstombs(cdfName, wchartemp, size); Free(wchartemp); triplet = find_nvt(&data_header,"affymetrix-cel-cols"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &dim1, &size); triplet = find_nvt(&data_header,"affymetrix-cel-rows"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &dim2, &size); Free_generic_data_header(&data_header); if ((dim1 != ref_dim_1) || (dim2 != ref_dim_2)){ error("Cel file %s does not seem to have the correct dimensions",filename); } if (strncasecmp(cdfName,ref_cdfName,strlen(ref_cdfName)) != 0){ error("Cel file %s does not seem to be of %s type",filename,ref_cdfName); } Free(cdfName); gzclose(infile); return 0; } int gzread_genericcel_file_intensities(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ size_t i=0; gzFile infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } gzread_generic_file_header(&my_header, infile); gzread_generic_data_header(&my_data_header, infile); gzread_generic_data_group(&my_data_group,infile); gzread_generic_data_set(&my_data_set,infile); gzread_generic_data_set_rows(&my_data_set,infile); for (i =0; i < my_data_set.nrows; i++){ intensity[chip_num*my_data_set.nrows + i] = (double)(((float *)my_data_set.Data[0])[i]); } gzclose(infile); Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); return(0); } int gzread_genericcel_file_stddev(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ size_t i=0; gzFile infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } gzread_generic_file_header(&my_header, infile); gzread_generic_data_header(&my_data_header, infile); gzread_generic_data_group(&my_data_group,infile); gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); gzread_generic_data_set(&my_data_set,infile); gzread_generic_data_set_rows(&my_data_set,infile); for (i =0; i < my_data_set.nrows; i++){ intensity[chip_num*my_data_set.nrows + i] = (double)(((float *)my_data_set.Data[0])[i]); } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); gzclose(infile); return(0); } int gzread_genericcel_file_npixels(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows){ size_t i=0; gzFile infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } gzread_generic_file_header(&my_header, infile); gzread_generic_data_header(&my_data_header, infile); gzread_generic_data_group(&my_data_group,infile); gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); gzread_generic_data_set(&my_data_set,infile); gzread_generic_data_set_rows(&my_data_set,infile); for (i =0; i < my_data_set.nrows; i++){ intensity[chip_num*my_data_set.nrows + i] = (double)(((short *)my_data_set.Data[0])[i]); } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); gzclose(infile); return(0); } void gzgeneric_get_masks_outliers(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y){ int i=0; gzFile infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); } gzread_generic_file_header(&my_header, infile); gzread_generic_data_header(&my_data_header, infile); gzread_generic_data_group(&my_data_group,infile); /* passing the intensities */ gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the stddev */ gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the npixels */ gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Outlier" */ gzread_generic_data_set(&my_data_set,infile); *noutliers = my_data_set.nrows; *outliers_x = Calloc(my_data_set.nrows,short); *outliers_y = Calloc(my_data_set.nrows,short); gzread_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ (*outliers_x)[i] = ((short *)my_data_set.Data[0])[i]; (*outliers_y)[i] = ((short *)my_data_set.Data[1])[i]; } gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Mask" */ gzread_generic_data_set(&my_data_set,infile); *nmasks = my_data_set.nrows; *masks_x = Calloc(my_data_set.nrows,short); *masks_y = Calloc(my_data_set.nrows,short); gzread_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ (*outliers_x)[i] = ((short *)my_data_set.Data[0])[i]; (*outliers_y)[i] = ((short *)my_data_set.Data[1])[i]; } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); gzclose(infile); } void gzgeneric_apply_masks(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows, int rm_mask, int rm_outliers){ size_t i=0; size_t cur_index; short cur_x, cur_y; int nrows; int size; gzFile infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; nvt_triplet *triplet; AffyMIMEtypes cur_mime_type; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); } gzread_generic_file_header(&my_header, infile); gzread_generic_data_header(&my_data_header, infile); gzread_generic_data_group(&my_data_group,infile); triplet = find_nvt(&my_data_header,"affymetrix-cel-rows"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &nrows, &size); /* passing the intensities */ gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the stddev */ gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the npixels */ gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Outlier" */ gzread_generic_data_set(&my_data_set,infile); if (rm_outliers){ gzread_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ cur_x = ((short *)my_data_set.Data[0])[i]; cur_y = ((short *)my_data_set.Data[1])[i]; cur_index = (int)cur_x + nrows*(int)cur_y; intensity[chip_num*rows + cur_index] = R_NaN; } } gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Mask" */ gzread_generic_data_set(&my_data_set,infile); if (rm_mask){ gzread_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ cur_x = ((short *)my_data_set.Data[0])[i]; cur_y = ((short *)my_data_set.Data[1])[i]; cur_index = (int)cur_x + nrows*(int)cur_y; intensity[chip_num*rows + cur_index] = R_NaN; } } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); gzclose(infile); } affyio/src/read_celfile_generic.h0000644000175400017540000000430513556116171020117 0ustar00biocbuildbiocbuild#ifndef READ_CELFILE_GENERIC_H #define READ_CELFILE_GENERIC_H #include "read_abatch.h" int isGenericCelFile(const char *filename); char *generic_get_header_info(const char *filename, int *dim1, int *dim2); void generic_get_detailed_header_info(const char *filename, detailed_header_info *header_info); int read_genericcel_file_intensities(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows); int check_generic_cel_file(const char *filename, const char *ref_cdfName, int ref_dim_1, int ref_dim_2); int read_genericcel_file_stddev(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows); int read_genericcel_file_npixels(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows); void generic_get_masks_outliers(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y); void generic_apply_masks(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows, int rm_mask, int rm_outliers); int isgzGenericCelFile(const char *filename); char *gzgeneric_get_header_info(const char *filename, int *dim1, int *dim2); void gzgeneric_get_detailed_header_info(const char *filename, detailed_header_info *header_info); int gzread_genericcel_file_intensities(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows); int check_gzgeneric_cel_file(const char *filename, const char *ref_cdfName, int ref_dim_1, int ref_dim_2); int gzread_genericcel_file_stddev(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows); int gzread_genericcel_file_npixels(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows); void gzgeneric_get_masks_outliers(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y); void gzgeneric_apply_masks(const char *filename, double *intensity, size_t chip_num, size_t rows, size_t cols, size_t chip_dim_rows, int rm_mask, int rm_outliers); #endif affyio/src/read_clf.c0000644000175400017540000005600413556116171015562 0ustar00biocbuildbiocbuild/****************************************************************** ** ** file: read_clf.c ** ** Aim: implement parsing of CLF format files ** ** Copyright (C) 2007-2008 B. M. Bolstad ** ** Created on Nov 4, 2007 ** ** History ** Dec 14, 2007 - Initial version ** Dec 31, 2007 - Add function for checking that required headers were found ** Jan 2, 2008 - port x,y to probe_id and probe_id to x,y functions from RMAExpress parsers ** Mar 18, 2008 - fix error in read_clf_header function ** ** ** ******************************************************************/ #include #include #include #define BUFFERSIZE 1024 /******************************************************************* ******************************************************************* ** ** Structures for dealing with clf file information ** ** ** ******************************************************************* ******************************************************************/ /******************************************************************* ******************************************************************* ** ** Starting off with the headers ** ******************************************************************* ******************************************************************/ /* integer (from 0 to n-1) indicates position of header (-1 means header is not present) */ typedef struct{ int probe_id; int x; int y; } header_0; /******************************************************************* ** ** These are all the headers that appear in CLF files ** ** Note that some are required (chip_type, lib_set_name, lib_set_version, clf_format_version ** rows, cols, header0) ** While others are optional (sequential, order, create_date, guid and others) ** ** *******************************************************************/ typedef struct{ char **chip_type; int n_chip_type; char *lib_set_name; char *lib_set_version; char *clf_format_version; int rows; int cols; char *header0_str; header_0 *header0; int sequential; char *order; char *create_date; char *guid; char **other_headers_keys; char **other_headers_values; int n_other_headers; } clf_headers; /******************************************************************* ******************************************************************* ** ** Now the actual data ** ** (only store the probeset ids to save space) ** ** length of probe_id is rows*cols. ** ** Given an x, y it maps to probe_id[index] ** ** index = y*cols + x ** ** Which means that given an index, it maps to ** ** x = index % cols where % means modulo (ie remainder) ** y = index / cols ** ** ** ******************************************************************* ******************************************************************/ typedef struct{ int *probe_id; } clf_data; /******************************************************************* ******************************************************************* ** ** Structure for storing clf file (after it is read from file) ** ******************************************************************* ******************************************************************/ typedef struct{ clf_headers *headers; clf_data *data; } clf_file; /******************************************************************* ******************************************************************* ** ** ** Code for splitting a string into a series of tokens ** ** ******************************************************************* *******************************************************************/ /*************************************************************** ** ** tokenset ** ** char **tokens - a array of token strings ** int n - number of tokens in this set. ** ** a structure to hold a set of tokens. Typically a tokenset is ** created by breaking a character string based upon a set of ** delimiters. ** ** **************************************************************/ typedef struct{ char **tokens; int n; } tokenset; /****************************************************************** ** ** tokenset *tokenize(char *str, char *delimiters) ** ** char *str - a string to break into tokens ** char *delimiters - delimiters to use in breaking up the line ** ** ** RETURNS a new tokenset ** ** Given a string, split into tokens based on a set of delimitors ** *****************************************************************/ static tokenset *tokenize(char *str, char *delimiters){ #if USE_PTHREADS char *tmp_pointer; #endif int i=0; char *current_token; tokenset *my_tokenset = Calloc(1,tokenset); my_tokenset->n=0; my_tokenset->tokens = NULL; #if USE_PTHREADS current_token = strtok_r(str,delimiters,&tmp_pointer); #else current_token = strtok(str,delimiters); #endif while (current_token != NULL){ my_tokenset->n++; my_tokenset->tokens = Realloc(my_tokenset->tokens,my_tokenset->n,char*); my_tokenset->tokens[i] = Calloc(strlen(current_token)+1,char); strcpy(my_tokenset->tokens[i],current_token); my_tokenset->tokens[i][(strlen(current_token))] = '\0'; i++; #if USE_PTHREADS current_token = strtok_r(NULL,delimiters,&tmp_pointer); #else current_token = strtok(NULL,delimiters); #endif } return my_tokenset; } /****************************************************************** ** ** int tokenset_size(tokenset *x) ** ** tokenset *x - a tokenset ** ** RETURNS the number of tokens in the tokenset ** ******************************************************************/ static int tokenset_size(tokenset *x){ return x->n; } /****************************************************************** ** ** char *get_token(tokenset *x, int i) ** ** tokenset *x - a tokenset ** int i - index of the token to return ** ** RETURNS pointer to the i'th token ** ******************************************************************/ static char *get_token(tokenset *x,int i){ return x->tokens[i]; } /****************************************************************** ** ** void delete_tokens(tokenset *x) ** ** tokenset *x - a tokenset ** ** Deallocates all the space allocated for a tokenset ** ******************************************************************/ static void delete_tokens(tokenset *x){ int i; for (i=0; i < x->n; i++){ Free(x->tokens[i]); } Free(x->tokens); Free(x); } /******************************************************************* ** ** int token_ends_with(char *token, char *ends) ** ** char *token - a string to check ** char *ends_in - we are looking for this string at the end of token ** ** ** returns 0 if no match, otherwise it returns the index of the first character ** which matchs the start of *ends. ** ** Note that there must be one additional character in "token" beyond ** the characters in "ends". So ** ** *token = "TestStr" ** *ends = "TestStr" ** ** would return 0 but if ** ** ends = "estStr" ** ** we would return 1. ** ** and if ** ** ends= "stStr" ** we would return 2 .....etc ** ** ******************************************************************/ /* static int token_ends_with(char *token, char *ends_in){ int tokenlength = strlen(token); int ends_length = strlen(ends_in); int start_pos; char *tmp_ptr; if (tokenlength <= ends_length){ // token string is too short so can't possibly end with ends return 0; } start_pos = tokenlength - ends_length; tmp_ptr = &token[start_pos]; if (strcmp(tmp_ptr,ends_in)==0){ return start_pos; } else { return 0; } } */ /******************************************************************* ******************************************************************* ** ** Code for Reading from file ** ******************************************************************* *******************************************************************/ /**************************************************************** ** ** void ReadFileLine(char *buffer, int buffersize, FILE *currentFile) ** ** char *buffer - place to store contents of the line ** int buffersize - size of the buffer ** FILE *currentFile - FILE pointer to an opened CEL file. ** ** Read a line from a file, into a buffer of specified size. ** otherwise die. ** ***************************************************************/ static int ReadFileLine(char *buffer, int buffersize, FILE *currentFile){ if (fgets(buffer, buffersize, currentFile) == NULL){ return 0; //error("End of file reached unexpectedly. Perhaps this file is truncated.\n"); } return 1; } /**************************************************************** **************************************************************** ** ** Code for identifying what type of information is stored in ** the current line ** **************************************************************** ***************************************************************/ /**************************************************************** ** ** static int IsHeaderLine(char *buffer) ** ** char *buffer - contains line to evaluate ** ** Checks whether supplied line is a header line (ie starts with #%) ** ** return 1 (ie true) if header line. 0 otherwise ** ***************************************************************/ static int IsHeaderLine(char *buffer){ if (strncmp("#%",buffer,2) == 0){ return 1; } return 0; } /**************************************************************** ** ** static int IsHeaderLine(char *buffer) ** ** char *buffer - contains line to evaluate ** ** Checks whether supplied line is a comment line (ie starts with #) ** ** ***************************************************************/ /* static int IsCommentLine(char *buffer){ if (strncmp("#",buffer,1) == 0){ return 1; } return 0; } */ /**************************************************************** ** ** void initialize_clf_header(clf_headers *header) ** ** Initialize all the header values ** ** ** ***************************************************************/ void initialize_clf_header(clf_headers *header){ header->chip_type = NULL; header->n_chip_type = 0; header->lib_set_name= NULL; header->lib_set_version= NULL; header->clf_format_version= NULL; header->header0_str= NULL; header->header0= NULL; header->order = NULL; header->create_date= NULL; header->guid= NULL; header->other_headers_keys= NULL; header->other_headers_values= NULL; header->n_other_headers=0; header->rows = -1; header->cols = -1; header->n_other_headers = -1; } /**************************************************************** **************************************************************** ** ** Code for reading in clf header ** **************************************************************** ***************************************************************/ static void determine_order_header0(char *header_str, header_0 *header0){ tokenset *cur_tokenset; int i; char *temp_str = Calloc(strlen(header_str) +1, char); strcpy(temp_str,header_str); header0->probe_id = -1; header0->x = -1; header0->y = -1; cur_tokenset = tokenize(temp_str,"\t\r\n"); for (i=0; i < tokenset_size(cur_tokenset); i++){ if (strcmp(get_token(cur_tokenset,i),"probe_id")==0){ header0->probe_id = i; } else if (strcmp(get_token(cur_tokenset,i),"x")==0){ header0->x = i; } else if (strcmp(get_token(cur_tokenset,i),"y")==0){ header0->y = i; } } delete_tokens(cur_tokenset); Free(temp_str); } /**************************************************************** ** ** Validate that required headers are present in file. ** ** Return 0 if an expected header is not present. ** Returns 1 otherwise (ie everything looks fine) ** ***************************************************************/ static int validate_clf_header(clf_headers *header){ /* check that required headers are all there (have been read) */ if (header->chip_type == NULL) return 0; if (header->lib_set_name == NULL) return 0; if (header->lib_set_version == NULL) return 0; if (header->clf_format_version == NULL) return 0; if (header->header0_str == NULL) return 0; if (header->rows == -1) return 0; if (header->cols == -1) return 0; /* Check that format version is 1.0 (only supported version) */ if (strcmp( header->clf_format_version,"1.0") != 0){ return 0; } /* check that header0, header1, header2 (ie the three levels of headers) have required fields */ if (header->header0->probe_id == -1) return 0; if (header->header0->x == -1) return 0; if (header->header0->y == -1) return 0; return 1; } /**************************************************************** ** ** static FILE *open_clf_file(const char *filename) ** ** Open the CLF to begin reading from it. ** ***************************************************************/ static FILE *open_clf_file(const char *filename){ const char *mode = "r"; FILE *currentFile = NULL; currentFile = fopen(filename,mode); if (currentFile == NULL){ error("Could not open file %s", filename); } return currentFile; } /**************************************************************** ** ** void read_clf_header(FILE *cur_file, char *buffer, clf_headers *header) ** ** read the CLF header section ** ** ***************************************************************/ void read_clf_header(FILE *cur_file, char *buffer, clf_headers *header){ tokenset *cur_tokenset; char *temp_str; initialize_clf_header(header); do { ReadFileLine(buffer, 1024, cur_file); /* Rprintf("%s\n",buffer); */ if (IsHeaderLine(buffer)){ cur_tokenset = tokenize(&buffer[2],"=\r\n"); /* hopefully token 0 is Key and token 1 is Value */ /* Rprintf("Key is: %s\n",get_token(cur_tokenset,0)); Rprintf("Value is: %s\n",get_token(cur_tokenset,1)); */ /* Decode the Key/Value pair */ if (strcmp(get_token(cur_tokenset,0),"chip_type") == 0){ if (header->n_chip_type == 0){ header->chip_type = Calloc(1, char *); } else { header->chip_type = Realloc(header->chip_type, header->n_chip_type+1, char *); } temp_str = Calloc(strlen(get_token(cur_tokenset,1))+1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->chip_type[header->n_chip_type] = temp_str; header->n_chip_type++; } else if (strcmp(get_token(cur_tokenset,0), "lib_set_name") == 0){ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->lib_set_name = temp_str; } else if (strcmp(get_token(cur_tokenset,0), "lib_set_version") == 0){ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->lib_set_version = temp_str; } else if (strcmp(get_token(cur_tokenset,0), "clf_format_version") == 0) { temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->clf_format_version = temp_str; } else if (strcmp(get_token(cur_tokenset,0), "rows") == 0) { header->rows = atoi(get_token(cur_tokenset,1)); } else if (strcmp(get_token(cur_tokenset,0), "cols") == 0) { header->cols = atoi(get_token(cur_tokenset,1)); } else if (strcmp(get_token(cur_tokenset,0), "header0") == 0) { temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->header0_str = temp_str; header->header0 = Calloc(1,header_0); determine_order_header0(header->header0_str,header->header0); } else if (strcmp(get_token(cur_tokenset,0), "create_date") == 0) { temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->create_date = temp_str; } else if (strcmp(get_token(cur_tokenset,0), "order") == 0) { temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->order = temp_str; } else if (strcmp(get_token(cur_tokenset,0), "sequential") == 0) { header->sequential = atoi(get_token(cur_tokenset,1)); } else if (strcmp(get_token(cur_tokenset,0), "guid") == 0) { temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->guid = temp_str; } else { /* not one of the recognised header types */ if ( header->n_other_headers == 0){ header->other_headers_keys = Calloc(1, char *); header->other_headers_values = Calloc(1, char *); } else { header->other_headers_keys = Realloc(header->other_headers_keys,header->n_other_headers+1, char *); header->other_headers_values = Realloc(header->other_headers_values,header->n_other_headers+1, char *); header->chip_type = Realloc(header->chip_type, header->n_chip_type+1, char *); } temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->other_headers_values[header->n_other_headers] = temp_str; temp_str = Calloc(strlen(get_token(cur_tokenset,0)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,0)); header->other_headers_keys[header->n_other_headers] = temp_str; header->n_other_headers++; } delete_tokens(cur_tokenset); } } while (IsHeaderLine(buffer)); } /**************************************************************** ** ** void read_clf_data(FILE *cur_file, char *buffer, clf_data *data, clf_headers *header) ** ** Read in the data part of the file. Specifically, the x,y, probe_id section. ** Note to save space only the probe_id are stored. ** ****************************************************************/ void read_clf_data(FILE *cur_file, char *buffer, clf_data *data, clf_headers *header){ tokenset *cur_tokenset; int x, y, cur_id; /* Check to see if the header information includes enough to know that probe_ids are deterministic */ /* if the are deterministic then don't need to read the rest of the file */ if (header->sequential > -1){ data->probe_id = NULL; return; } else { data->probe_id = Calloc((header->rows)*(header->cols), int); cur_tokenset = tokenize(buffer,"\t\r\n"); cur_id = atoi(get_token(cur_tokenset,header->header0->probe_id)); x = atoi(get_token(cur_tokenset,header->header0->x)); y = atoi(get_token(cur_tokenset,header->header0->y)); data->probe_id[y*header->cols + x] = cur_id; delete_tokens(cur_tokenset); while(ReadFileLine(buffer, 1024, cur_file)){ cur_tokenset = tokenize(buffer,"\t\r\n"); cur_id = atoi(get_token(cur_tokenset,header->header0->probe_id)); x = atoi(get_token(cur_tokenset,header->header0->x)); y = atoi(get_token(cur_tokenset,header->header0->y)); data->probe_id[y*header->cols + x] = cur_id; delete_tokens(cur_tokenset); } } } /**************************************************************** **************************************************************** ** ** Code for deallocating or initializing header data structures ** **************************************************************** ****************************************************************/ void dealloc_clf_headers(clf_headers *header){ int i; if (header->n_chip_type > 0){ for (i = 0; i < header->n_chip_type; i++){ Free(header->chip_type[i]); } Free(header->chip_type); } if (header->lib_set_name != NULL){ Free(header->lib_set_name); } if (header->lib_set_version != NULL){ Free(header->lib_set_version); } if (header->clf_format_version != NULL){ Free(header->clf_format_version); } if (header->header0_str != NULL){ Free(header->header0_str); Free(header->header0); } if (header->order != NULL){ Free(header->order); } if (header->create_date != NULL){ Free(header->create_date); } if (header->guid != NULL){ Free(header->guid); } if (header->n_other_headers > 0){ for (i = 0; i < header->n_other_headers; i++){ Free(header->other_headers_keys[i]); Free(header->other_headers_values[i]); } Free(header->other_headers_keys); Free(header->other_headers_values); } } void dealloc_clf_data(clf_data *data){ if (data->probe_id != NULL){ Free(data->probe_id); } } void dealloc_clf_file(clf_file* my_clf){ if (my_clf->headers != NULL){ dealloc_clf_headers(my_clf->headers); Free(my_clf->headers); } if (my_clf->data !=NULL){ dealloc_clf_data(my_clf->data); Free(my_clf->data); } } /********************************************************************** *** *** A function for getting the probe_id for a given x,y *** *** *********************************************************************/ void clf_get_probe_id(clf_file *clf, int *probe_id, int x, int y){ if (clf->headers->sequential > -1){ /* Check if order is "col_major" or "row_major" */ if (strcmp(clf->headers->order,"col_major") == 0){ *probe_id = y*clf->headers->cols + x + clf->headers->sequential; } else if (strcmp(clf->headers->order,"row_major") == 0){ *probe_id = x*clf->headers->rows + y + clf->headers->sequential; } else { *probe_id = -1; /* ie missing */ } } else { *probe_id = clf->data->probe_id[y*clf->headers->rows + x]; } } /********************************************************************** *** *** A function for getting the x , y for a given probe_id *** *** *********************************************************************/ void clf_get_x_y(clf_file *clf, int probe_id, int *x, int *y){ int ind; if (clf->headers->sequential > -1){ /* Check if order is "col_major" or "row_major" */ if (strcmp(clf->headers->order,"col_major") == 0){ ind = (probe_id - clf->headers->sequential); *x = ind%clf->headers->cols; *y = ind/clf->headers->cols; } else if (strcmp(clf->headers->order,"row_major") == 0){ ind = (probe_id - clf->headers->sequential); *x = ind/clf->headers->rows; *y = ind%clf->headers->rows; } else { *x = -1; /* ie missing */ *y = -1; } } else { /* Linear Search (this should be improved for routine use) */ ind = 0; while (ind < (clf->headers->cols*clf->headers->rows)){ if (clf->data->probe_id[ind] == probe_id){ break; } ind++; } if (ind == (clf->headers->cols*clf->headers->rows)){ *x = -1; *y = -1; } else { *x = ind/clf->headers->rows; *y = ind%clf->headers->rows; } } } /* * Note this function is only for testing purposes. It provides no methodology for accessing anything * stored in the CLF file in R. * */ void read_clf_file(char **filename){ FILE *cur_file; clf_file my_clf; char *buffer = Calloc(1024, char); cur_file = open_clf_file(filename[0]); my_clf.headers = Calloc(1, clf_headers); my_clf.data = Calloc(1, clf_data); read_clf_header(cur_file,buffer,my_clf.headers); if (validate_clf_header(my_clf.headers)) read_clf_data(cur_file, buffer, my_clf.data, my_clf.headers); Free(buffer); dealloc_clf_file(&my_clf); fclose(cur_file); } affyio/src/read_generic.c0000644000175400017540000014733613556116171016443 0ustar00biocbuildbiocbuild/************************************************************* ** ** file: read_generic.c ** ** Written by B. M. Bolstad ** ** Aim is to read in Affymetrix files in the ** "Command Console Generic Data" File Format ** This format is sometimes known as the Calvin format ** ** As with other file format functionality in affyio ** gzipped files are accepted. ** ** The implementation here is based upon openly available ** file format information. The code here is not dependent or based ** in anyway on that in the Fusion SDK. ** ** ** History ** Aug 25, 2007 - Initial version ** Sep 9, 2007 - fix some compiler warnings. ** Oct 25, 2007 - fix error in decode_UINT8_t ** Jan 28, 2008 - fix read_generic_data_group/gzread_generic_data_group. Change bitwise OR (|) to logical OR (||) ** Feb 11, 2008 - add #include for inttypes.h in situations that stdint.h might not exist ** Feb 13, 2008 - add decode_MIME_value_toASCII which takes any MIME and attempts to convert to a string ** Jul 29, 2008 - fix preprocessor directive error for WORDS_BIGENDIAN systems ** Jan 15, 2008 - Fix VECTOR_ELT/STRING_ELT issues ** Feb, 2011 - Some debugging code for checking Generic file format parsing ** Nov, 2011 - Some additional fixed to deal with fixed width fields for strings in dataset rows ** Sept 4, 2017 - change gzFile * to gzFile ** *************************************************************/ #include #include #include #include #ifdef HAVE_STDINT_H #include #elif HAVE_INTTYPES_H #include #endif #include #include #include #include "fread_functions.h" #include "read_generic.h" static void Free_ASTRING(ASTRING *string){ Free(string->value); string->len =0; } static void Free_AWSTRING(AWSTRING *string){ Free(string->value); string->len =0; } static void Free_nvt_triplet(nvt_triplet *triplet){ Free_AWSTRING(&(triplet->name)); Free_ASTRING(&(triplet->value)); Free_AWSTRING(&(triplet->type)); } static void Free_nvts_triplet(col_nvts_triplet *triplet){ Free_AWSTRING(&(triplet->name)); } void Free_generic_data_header(generic_data_header *header){ int i; generic_data_header *temp; Free_ASTRING(&(header->data_type_id)); Free_ASTRING(&(header->unique_file_id)); Free_AWSTRING(&(header->Date_time)); Free_AWSTRING(&(header->locale)); for (i =0; i < header->n_name_type_value; i++){ Free_nvt_triplet(&(header->name_type_value[i])); } Free(header->name_type_value); for (i=0; i < (header->n_parent_headers); i++){ temp = (generic_data_header *)header->parent_headers[i]; Free_generic_data_header(temp); Free(temp); } if (header->parent_headers != 0) Free(header->parent_headers); } void Free_generic_data_group(generic_data_group *data_group){ Free_AWSTRING(&(data_group->data_group_name)); } void Free_generic_data_set(generic_data_set *data_set){ int j,i; for (j= 0; j < data_set->ncols; j++){ if (data_set->col_name_type_value[j].type == 7){ for (i=0; i < data_set->nrows; i++){ /* ASTRING */ Free_ASTRING(&((ASTRING *)data_set->Data[j])[i]); } } else if (data_set->col_name_type_value[j].type == 8){ for (i=0; i < data_set->nrows; i++){ /* AWSTRING */ Free_AWSTRING(&((AWSTRING *)data_set->Data[j])[i]); } } Free(data_set->Data[j]); } Free(data_set->Data); for (j=0; j < data_set->ncols; j++){ Free_nvts_triplet(&(data_set->col_name_type_value[j])); } Free(data_set->col_name_type_value); for (j =0; j < data_set->n_name_type_value; j++){ Free_nvt_triplet(&(data_set->name_type_value[j])); } Free(data_set->name_type_value); Free_AWSTRING(&(data_set->data_set_name)); } static int fread_ASTRING(ASTRING *destination, FILE *instream){ fread_be_int32(&(destination->len),1,instream); if (destination->len > 0){ destination->value = Calloc(destination->len+1,char); fread_be_char(destination->value,destination->len,instream); } else { destination->value = 0; } return 1; } static int fread_ASTRING_fw(ASTRING *destination, FILE *instream, int length){ fread_be_int32(&(destination->len),1,instream); if (destination->len > 0){ destination->value = Calloc(destination->len+1,char); fread_be_char(destination->value,destination->len,instream); if (length > destination->len){ fseek(instream, length-destination->len, SEEK_CUR); } } else { destination->value = 0; } return 1; } static int fread_AWSTRING(AWSTRING *destination, FILE *instream){ uint16_t temp; /* Affy file wchar_t are 16 bits, the platform may have 32 bit wchar_t (notatbly linux) */ int i; fread_be_int32(&(destination->len),1,instream); if ((destination->len) > 0){ destination->value = Calloc(destination->len+1,wchar_t); for (i=0; i < destination->len; i++){ fread_be_uint16(&temp,1,instream); destination->value[i] = (wchar_t)temp; } } else { destination->value = 0; } return 1; } static int fread_AWSTRING_fw(AWSTRING *destination, FILE *instream, int length){ uint16_t temp; /* Affy file wchar_t are 16 bits, the platform may have 32 bit wchar_t (notatbly linux) */ int i; fread_be_int32(&(destination->len),1,instream); if ((destination->len) > 0){ destination->value = Calloc(destination->len+1,wchar_t); for (i=0; i < destination->len; i++){ fread_be_uint16(&temp,1,instream); destination->value[i] = (wchar_t)temp; } if (length > 2*destination->len){ fseek(instream, length-2*destination->len, SEEK_CUR); } } else { destination->value = 0; } return 1; } static int fread_nvt_triplet(nvt_triplet *destination, FILE *instream){ if (!(fread_AWSTRING(&(destination->name),instream)) || !(fread_ASTRING(&(destination->value),instream)) || !fread_AWSTRING(&(destination->type),instream)){ return 0; } return 1; } static int fread_nvts_triplet(col_nvts_triplet *destination, FILE *instream){ if (!(fread_AWSTRING(&(destination->name),instream)) || !(fread_be_uchar(&(destination->type), 1, instream)) || !(fread_be_int32(&(destination->size), 1, instream))){ return 0; } return 1; } /* The Value is MIME text/ASCII */ static char *decode_ASCII(ASTRING value){ char *return_value; return_value = Calloc(value.len+1,char); memcpy(return_value, value.value, value.len); return return_value; } /* The value is MIME text/plain which means wchar (16bit) string */ static wchar_t *decode_TEXT(ASTRING value){ int i; uint32_t len = value.len/ sizeof(uint16_t); wchar_t* return_value = Calloc(len+1,wchar_t); ASTRING temp; uint16_t *contents; temp.len = value.len; temp.value = Calloc(value.len, char); memcpy(temp.value, value.value,value.len); contents = (uint16_t *)temp.value; for (i=0; i < len; i++){ #ifndef WORDS_BIGENDIAN contents[i]=(((contents[i]>>8)&0xff) | ((contents[i]&0xff)<<8)); #endif return_value[i] = contents[i]; } Free_ASTRING(&temp); return return_value; } static int8_t decode_INT8_t(ASTRING value){ int32_t contents; memcpy(&contents,value.value, sizeof(int32_t)); #ifndef WORDS_BIGENDIAN contents=(((contents>>24)&0xff)); #endif return (int8_t)contents; } static uint8_t decode_UINT8_t(ASTRING value){ uint32_t contents; memcpy(&contents,value.value, sizeof(uint32_t)); #ifndef WORDS_BIGENDIAN contents=(((contents>>24)&0xff)); #endif return (uint8_t)contents; } static int16_t decode_INT16_t(ASTRING value){ int32_t contents; memcpy(&contents,value.value, sizeof(int32_t)); #ifndef WORDS_BIGENDIAN contents=(((contents>>24)&0xff) | ((contents>>8)&0xff00)); #endif return (int16_t)contents; } static uint16_t decode_UINT16_t(ASTRING value){ uint32_t contents; memcpy(&contents,value.value, sizeof(uint32_t)); #ifndef WORDS_BIGENDIAN contents=(((contents>>24)&0xff) | ((contents>>8)&0xff00)); #endif return (uint16_t)contents; } static int32_t decode_INT32_t(ASTRING value){ int32_t contents; memcpy(&contents,value.value, sizeof(int32_t)); #ifndef WORDS_BIGENDIAN contents=(((contents>>24)&0xff) | ((contents&0xff)<<24) | ((contents>>8)&0xff00) | ((contents&0xff00)<<8)); #endif return contents; } static int32_t decode_UINT32_t(ASTRING value){ uint32_t contents; memcpy(&contents,value.value, sizeof(uint32_t)); #ifndef WORDS_BIGENDIAN contents=(((contents>>24)&0xff) | ((contents&0xff)<<24) | ((contents>>8)&0xff00) | ((contents&0xff00)<<8)); #endif return contents; } static float decode_float32(ASTRING value){ uint32_t contents; float returnvalue; memcpy(&contents,value.value, sizeof(uint32_t)); #ifndef WORDS_BIGENDIAN contents=(((contents>>24)&0xff) | ((contents&0xff)<<24) | ((contents>>8)&0xff00) | ((contents&0xff00)<<8)); #endif memcpy(&returnvalue,&contents, sizeof(uint32_t)); return returnvalue; } AffyMIMEtypes determine_MIMETYPE(nvt_triplet triplet){ if (!wcscmp(triplet.type.value,L"text/x-calvin-float")){ return FLOAT32; } if (!wcscmp(triplet.type.value,L"text/plain")){ return PLAINTEXT; } if (!wcscmp(triplet.type.value,L"text/ascii")){ return ASCIITEXT; } if (!wcscmp(triplet.type.value,L"text/x-calvin-integer-32")){ return INT32; } if (!wcscmp(triplet.type.value,L"text/x-calvin-integer-16")){ return INT16; } if (!wcscmp(triplet.type.value,L"text/x-calvin-unsigned-integer-32")){ return UINT32; } if (!wcscmp(triplet.type.value,L"text/x-calvin-unsigned-integer-16")){ return INT16; } if (!wcscmp(triplet.type.value,L"text/x-calvin-integer-8")){ return INT8; } if (!wcscmp(triplet.type.value,L"text/x-calvin-unsigned-integer-8")){ return UINT8; } Rprintf("read_generic.c: Unknown MIME type encountered\n"); return FLOAT32; } void *decode_MIME_value(nvt_triplet triplet, AffyMIMEtypes mimetype, void *result, int *size){ char *temp; wchar_t *temp2; if (mimetype == ASCIITEXT){ temp = decode_ASCII(triplet.value); *size = strlen(temp); result = temp; return temp; } if (mimetype == PLAINTEXT){ temp2 = decode_TEXT(triplet.value); *size = wcslen(temp2); result = temp2; return temp2; } if (mimetype == UINT8){ *size =1; *(uint8_t *)result = decode_UINT8_t(triplet.value); } if (mimetype == INT8){ *size =1; *(int8_t *)result = decode_INT8_t(triplet.value); } if (mimetype == UINT16){ *size =1; *(uint16_t *)result = decode_UINT16_t(triplet.value); } if (mimetype == INT16){ *size =1; *(int16_t *)result = decode_INT16_t(triplet.value); } if (mimetype == UINT32){ *size =1; *(uint32_t *)result = decode_UINT32_t(triplet.value); } if (mimetype == INT32){ *size =1; *(int32_t *)result = decode_INT32_t(triplet.value); } if (mimetype == FLOAT32){ *size =1; *(float *)result = decode_float32(triplet.value); } return 0; } char *decode_MIME_value_toASCII(nvt_triplet triplet, AffyMIMEtypes mimetype, void *result, int *size){ char *temp; wchar_t *temp2; float temp_float; uint8_t temp_uint8; uint16_t temp_uint16; uint32_t temp_uint32; int8_t temp_int8; int16_t temp_int16; int32_t temp_int32; if (mimetype == ASCIITEXT){ temp = decode_ASCII(triplet.value); *size = strlen(temp); result = temp; return temp; } if (mimetype == PLAINTEXT){ temp2 = decode_TEXT(triplet.value); temp = Calloc(triplet.value.len/2 +1, char); wcstombs(temp,temp2,triplet.value.len/2 + 1); *size = strlen(temp); result = temp; return temp; } /* 64 here is a bit hackish */ temp = Calloc(64,char); if (mimetype == UINT8){ temp_uint8 = decode_UINT8_t(triplet.value); sprintf(temp,"%u",temp_uint8); *size = strlen(temp); result = temp; return temp; } if (mimetype == INT8){ temp_int8 = decode_INT8_t(triplet.value); sprintf(temp,"%d",temp_int8); *size = strlen(temp); result = temp; return temp; } if (mimetype == UINT16){ temp_uint16 = decode_UINT16_t(triplet.value); sprintf(temp,"%u",temp_uint16); *size = strlen(temp); result = temp; return temp; } if (mimetype == INT16){ temp_int16 = decode_INT16_t(triplet.value); sprintf(temp,"%d",temp_int16); *size = strlen(temp); result = temp; return temp; } if (mimetype == UINT32){ temp_uint32 = decode_UINT32_t(triplet.value); sprintf(temp,"%u",temp_uint32); *size = strlen(temp); result = temp; return temp; } if (mimetype == INT32){ temp_int32 = decode_INT32_t(triplet.value); sprintf(temp,"%d",temp_int32); *size = strlen(temp); result = temp; return temp; } if (mimetype == FLOAT32){ temp_float = decode_float32(triplet.value); sprintf(temp,"%f",temp_float); *size = strlen(temp); result = temp; return temp; } return 0; } nvt_triplet* find_nvt(generic_data_header *data_header,char *name){ nvt_triplet* returnvalue = 0; wchar_t *wname; int i; int len = strlen(name); wname = Calloc(len+1, wchar_t); mbstowcs(wname, name, len); for (i =0; i < data_header->n_name_type_value; i++){ if (wcscmp(wname, data_header->name_type_value[i].name.value) == 0){ returnvalue = &(data_header->name_type_value[i]); break; } } if (returnvalue == 0){ for (i =0; i < data_header->n_parent_headers; i++){ returnvalue = find_nvt((generic_data_header *)(data_header->parent_headers)[i],name); if (returnvalue !=0){ break; } } } Free(wname); return returnvalue; } int read_generic_file_header(generic_file_header* file_header, FILE *instream){ if (!fread_be_uchar(&(file_header->magic_number),1,instream)){ return 0; } if (file_header->magic_number != 59){ return 0; } if (!fread_be_uchar(&(file_header->version),1,instream)){ return 0; } if (file_header->version != 1){ return 0; } if (!fread_be_int32(&(file_header->n_data_groups),1,instream) || !fread_be_uint32(&(file_header->first_group_file_pos),1,instream)){ return 0; } return 1; } int read_generic_data_header(generic_data_header *data_header, FILE *instream){ int i; generic_data_header *temp_header; if (!fread_ASTRING(&(data_header->data_type_id), instream) || !fread_ASTRING(&(data_header->unique_file_id), instream) || !fread_AWSTRING(&(data_header->Date_time), instream) || !fread_AWSTRING(&(data_header->locale),instream)){ return 0; } if (!fread_be_int32(&(data_header->n_name_type_value),1,instream)){ return 0; } data_header->name_type_value = Calloc(data_header->n_name_type_value, nvt_triplet); for (i =0; i < data_header->n_name_type_value; i++){ if (!fread_nvt_triplet(&data_header->name_type_value[i],instream)){ return 0; } } if (!fread_be_int32(&(data_header->n_parent_headers),1,instream)){ return 0; } if (data_header->n_parent_headers > 0){ data_header->parent_headers = Calloc(data_header->n_parent_headers,void *); } else { data_header->parent_headers = 0; } for (i =0; i < data_header->n_parent_headers; i++){ temp_header = (generic_data_header *)Calloc(1,generic_data_header); if (!read_generic_data_header(temp_header,instream)){ return 0; } data_header->parent_headers[i] = temp_header; } return 1; } int read_generic_data_group(generic_data_group *data_group, FILE *instream){ if (!fread_be_uint32(&(data_group->file_position_nextgroup),1,instream) || !fread_be_uint32(&(data_group->file_position_first_data),1,instream) || !fread_be_int32(&(data_group->n_data_sets),1,instream) || !fread_AWSTRING(&(data_group->data_group_name), instream)){ return 0; } return 1; } int read_generic_data_set(generic_data_set *data_set, FILE *instream){ int i; if (!fread_be_uint32(&(data_set->file_pos_first),1,instream) || !fread_be_uint32(&(data_set->file_pos_last),1,instream) || !fread_AWSTRING(&(data_set->data_set_name), instream) || !fread_be_int32(&(data_set->n_name_type_value),1,instream)){ return 0; } data_set->name_type_value = Calloc(data_set->n_name_type_value, nvt_triplet); for (i =0; i < data_set->n_name_type_value; i++){ if (!fread_nvt_triplet(&data_set->name_type_value[i],instream)){ return 0; } } if (!fread_be_uint32(&(data_set->ncols),1,instream)){ return 0; } data_set->col_name_type_value = Calloc(data_set->ncols,col_nvts_triplet); for (i =0; i < data_set->ncols; i++){ if (!fread_nvts_triplet(&data_set->col_name_type_value[i], instream)){ return 0; } } if (!fread_be_uint32(&(data_set->nrows),1,instream)){ return 0; } data_set->Data = Calloc(data_set->ncols, void *); for (i=0; i < data_set->ncols; i++){ switch(data_set->col_name_type_value[i].type){ case 0: data_set->Data[i] = Calloc(data_set->nrows,char); break; case 1: data_set->Data[i] = Calloc(data_set->nrows,unsigned char); break; case 2: data_set->Data[i] = Calloc(data_set->nrows,short); break; case 3: data_set->Data[i] = Calloc(data_set->nrows,unsigned short); break; case 4: data_set->Data[i] = Calloc(data_set->nrows,int); break; case 5: data_set->Data[i] = Calloc(data_set->nrows,unsigned int); break; case 6: data_set->Data[i] = Calloc(data_set->nrows,float); break; /* case 7: data_set->Data[i] = Calloc(data_set->nrows,double); break; */ case 7: data_set->Data[i] = Calloc(data_set->nrows,ASTRING); break; case 8: data_set->Data[i] = Calloc(data_set->nrows,AWSTRING); break; } } return 1; } int read_generic_data_set_rows(generic_data_set *data_set, FILE *instream){ int i,j; for (i=0; i < data_set->nrows; i++){ for (j=0; j < data_set->ncols; j++){ switch(data_set->col_name_type_value[j].type){ case 0: if (!fread_be_char(&((char *)data_set->Data[j])[i],1,instream)){ return 0; } break; case 1: if (!fread_be_uchar(&((unsigned char *)data_set->Data[j])[i],1,instream)){ return 0; } break; case 2: if (!fread_be_int16(&((short *)data_set->Data[j])[i],1,instream)){ return 0; } break; case 3: if (!fread_be_uint16(&((unsigned short *)data_set->Data[j])[i],1,instream)){ return 0; } break; case 4: if (!fread_be_int32(&((int32_t *)data_set->Data[j])[i],1,instream)){ return 0; } break; case 5: if (!fread_be_uint32(&((uint32_t *)data_set->Data[j])[i],1,instream)){ return 0; } break; case 6: if (!fread_be_float32(&((float *)data_set->Data[j])[i],1,instream)){ return 0; } break; /* case 7: if (!fread_be_double64(&((double *)data_set->Data[j])[i],1,instream)){ return 0; } break; */ case 7: if (!fread_ASTRING_fw(&((ASTRING *)data_set->Data[j])[i], instream, data_set->col_name_type_value[j].size-4)){ return 0; } break; case 8: if (!fread_AWSTRING_fw(&((AWSTRING *)data_set->Data[j])[i], instream, data_set->col_name_type_value[j].size-4)){ return 0; }; break; } } } return 1; } /***************************************************************************** ** ** ** Functionality for reading a generic format file which has been gzipped ** ** *****************************************************************************/ static int gzread_ASTRING(ASTRING *destination, gzFile instream){ gzread_be_int32(&(destination->len),1,instream); if (destination->len > 0){ destination->value = Calloc(destination->len+1,char); gzread_be_char(destination->value,destination->len,instream); } else { destination->value = 0; } return 1; } static int gzread_ASTRING_fw(ASTRING *destination, gzFile instream, int length){ gzread_be_int32(&(destination->len),1,instream); if (destination->len > 0){ destination->value = Calloc(destination->len+1,char); gzread_be_char(destination->value,destination->len,instream); if (length > destination->len){ gzseek(instream, length-destination->len, SEEK_CUR); } } else { destination->value = 0; } return 1; } static int gzread_AWSTRING(AWSTRING *destination, gzFile instream){ uint16_t temp; /* Affy file wchar_t are 16 bits, the platform may have 32 bit wchar_t (notatbly linux) */ int i; gzread_be_int32(&(destination->len),1,instream); if ((destination->len) > 0){ destination->value = Calloc(destination->len+1,wchar_t); for (i=0; i < destination->len; i++){ gzread_be_uint16(&temp,1,instream); destination->value[i] = (wchar_t)temp; } } else { destination->value = 0; } return 1; } static int gzread_AWSTRING_fw(AWSTRING *destination, gzFile instream, int length){ uint16_t temp; /* Affy file wchar_t are 16 bits, the platform may have 32 bit wchar_t (notatbly linux) */ int i; gzread_be_int32(&(destination->len),1,instream); if ((destination->len) > 0){ destination->value = Calloc(destination->len+1,wchar_t); for (i=0; i < destination->len; i++){ gzread_be_uint16(&temp,1,instream); destination->value[i] = (wchar_t)temp; } if (length > 2*destination->len){ gzseek(instream, length-2*destination->len, SEEK_CUR); } } else { destination->value = 0; } return 1; } static int gzread_nvt_triplet(nvt_triplet *destination, gzFile instream){ if (!(gzread_AWSTRING(&(destination->name),instream)) || !(gzread_ASTRING(&(destination->value),instream)) || !(gzread_AWSTRING(&(destination->type),instream))){ return 0; } return 1; } static int gzread_nvts_triplet(col_nvts_triplet *destination, gzFile instream){ if (!(gzread_AWSTRING(&(destination->name),instream)) || !(gzread_be_uchar(&(destination->type), 1, instream)) || !(gzread_be_int32(&(destination->size), 1, instream))){ return 0; } return 1; } int gzread_generic_file_header(generic_file_header* file_header, gzFile instream){ if (!gzread_be_uchar(&(file_header->magic_number),1,instream)){ return 0; } if (file_header->magic_number != 59){ return 0; } if (!gzread_be_uchar(&(file_header->version),1,instream)){ return 0; } if (file_header->version != 1){ return 0; } if (!gzread_be_int32(&(file_header->n_data_groups),1,instream) || !gzread_be_uint32(&(file_header->first_group_file_pos),1,instream)){ return 0; } return 1; } int gzread_generic_data_header(generic_data_header *data_header, gzFile instream){ int i; if (!gzread_ASTRING(&(data_header->data_type_id), instream) || !gzread_ASTRING(&(data_header->unique_file_id), instream) || !gzread_AWSTRING(&(data_header->Date_time), instream) || !gzread_AWSTRING(&(data_header->locale),instream)){ return 0; } if (!gzread_be_int32(&(data_header->n_name_type_value),1,instream)){ return 0; } data_header->name_type_value = Calloc(data_header->n_name_type_value, nvt_triplet); for (i =0; i < data_header->n_name_type_value; i++){ if (!gzread_nvt_triplet(&data_header->name_type_value[i],instream)){ return 0; } } if (!gzread_be_int32(&(data_header->n_parent_headers),1,instream)){ return 0; } data_header->parent_headers = Calloc(data_header->n_parent_headers,void *); for (i =0; i < data_header->n_parent_headers; i++){ data_header->parent_headers[i] = (generic_data_header *)Calloc(1,generic_data_header); if (!gzread_generic_data_header((generic_data_header *)data_header->parent_headers[i],instream)){ return 0; } } return 1; } int gzread_generic_data_group(generic_data_group *data_group, gzFile instream){ if (!gzread_be_uint32(&(data_group->file_position_nextgroup),1,instream) || !gzread_be_uint32(&(data_group->file_position_first_data),1,instream) || !gzread_be_int32(&(data_group->n_data_sets),1,instream) || !gzread_AWSTRING(&(data_group->data_group_name), instream)){ return 0; } return 1; } int gzread_generic_data_set(generic_data_set *data_set, gzFile instream){ int i; if (!gzread_be_uint32(&(data_set->file_pos_first),1,instream) || !gzread_be_uint32(&(data_set->file_pos_last),1,instream) || !gzread_AWSTRING(&(data_set->data_set_name), instream) || !gzread_be_int32(&(data_set->n_name_type_value),1,instream)){ return 0; } data_set->name_type_value = Calloc(data_set->n_name_type_value, nvt_triplet); for (i =0; i < data_set->n_name_type_value; i++){ if (!gzread_nvt_triplet(&data_set->name_type_value[i],instream)){ return 0; } } if (!gzread_be_uint32(&(data_set->ncols),1,instream)){ return 0; } data_set->col_name_type_value = Calloc(data_set->ncols,col_nvts_triplet); for (i =0; i < data_set->ncols; i++){ if (!gzread_nvts_triplet(&data_set->col_name_type_value[i], instream)){ return 0; } } if (!gzread_be_uint32(&(data_set->nrows),1,instream)){ return 0; } data_set->Data = Calloc(data_set->ncols, void *); for (i=0; i < data_set->ncols; i++){ switch(data_set->col_name_type_value[i].type){ case 0: data_set->Data[i] = Calloc(data_set->nrows,char); break; case 1: data_set->Data[i] = Calloc(data_set->nrows,unsigned char); break; case 2: data_set->Data[i] = Calloc(data_set->nrows,short); break; case 3: data_set->Data[i] = Calloc(data_set->nrows,unsigned short); break; case 4: data_set->Data[i] = Calloc(data_set->nrows,int); break; case 5: data_set->Data[i] = Calloc(data_set->nrows,unsigned int); break; case 6: data_set->Data[i] = Calloc(data_set->nrows,float); break; /* case 7: data_set->Data[i] = Calloc(data_set->nrows,double); break; */ case 7: data_set->Data[i] = Calloc(data_set->nrows,ASTRING); break; case 8: data_set->Data[i] = Calloc(data_set->nrows,AWSTRING); break; } } return 1; } int gzread_generic_data_set_rows(generic_data_set *data_set, gzFile instream){ int i,j; for (i=0; i < data_set->nrows; i++){ for (j=0; j < data_set->ncols; j++){ switch(data_set->col_name_type_value[j].type){ case 0: if (!gzread_be_char(&((char *)data_set->Data[j])[i],1,instream)){ return 0; } break; case 1: if (!gzread_be_uchar(&((unsigned char *)data_set->Data[j])[i],1,instream)){ return 0; } break; case 2: if (!gzread_be_int16(&((short *)data_set->Data[j])[i],1,instream)){ return 0; } break; case 3: if (!gzread_be_uint16(&((unsigned short *)data_set->Data[j])[i],1,instream)){ return 0; } break; case 4: if (!gzread_be_int32(&((int32_t *)data_set->Data[j])[i],1,instream)){ return 0; } break; case 5: if (!gzread_be_uint32(&((uint32_t *)data_set->Data[j])[i],1,instream)){ return 0; } break; case 6: if (!gzread_be_float32(&((float *)data_set->Data[j])[i],1,instream)){ return 0; } break; /* case 7: if (!gzread_be_double64(&((double *)data_set->Data[j])[i],1,instream)){ return 0; } break; */ case 7: if (!gzread_ASTRING_fw(&((ASTRING *)data_set->Data[j])[i], instream,data_set->col_name_type_value[j].size-4)){ return 0; } break; case 8: if (!gzread_AWSTRING_fw(&((AWSTRING *)data_set->Data[j])[i], instream, data_set->col_name_type_value[j].size-4)){ return 0; }; break; } } } return 1; } /***************************************************************************** ** ** TESTING FUNCTIONS ** ** ** ** ** The following functions are for testing purposes only they print contents ** of the generic format file to the screen ** ******************************************************************************/ static void print_file_header(generic_file_header header){ Rprintf("Magic Number: %d\n",header.magic_number); Rprintf("Header Version: %d\n",header.version); Rprintf("Number of DataGroups: %d\n",header.n_data_groups); Rprintf("FirstGroup Position: %d\n",header.first_group_file_pos); } static void print_ASTRING(ASTRING string){ if (string.len > 0){ Rprintf("%s",string.value); } } static void print_AWSTRING(AWSTRING string){ if (string.len > 0){ char *temp = Calloc(string.len+1,char); wcstombs(temp, string.value, string.len); Rprintf("%s",temp); Free(temp); } } static void print_decode_nvt_triplet(nvt_triplet triplet){ wchar_t *temp; char *temp2; // char Buffer[10000]; int size; int temp32; float tempfloat; // Rprintf("Size is %d\n",triplet.value.len); if (!wcscmp(triplet.type.value,L"text/x-calvin-float")){ Rprintf("Its a float value is %f\n",decode_float32(triplet.value)); Rprintf("Now Trying it again. But using exposed function\n"); decode_MIME_value(triplet, determine_MIMETYPE(triplet),&tempfloat,&size); Rprintf("Its a float value is %f\n",temp32); } if (!wcscmp(triplet.type.value,L"text/ascii")){ temp2 = decode_ASCII(triplet.value); Rprintf("Its a Ascii String value is %s\n",temp2); Free(temp2); Rprintf("Now Trying it again. But using exposed function\n"); temp2 = decode_MIME_value(triplet, determine_MIMETYPE(triplet),temp2,&size); Rprintf("Its a Ascii String value is %s with size %d\n",temp2, size); Free(temp2); } if (!wcscmp(triplet.type.value,L"text/plain")){ temp = decode_TEXT(triplet.value); temp2 = Calloc(triplet.value.len/2 +1, char); wcstombs(temp2,temp,triplet.value.len/2 + 1); Rprintf("Text/plain String is %s\n",temp2); Free(temp); Free(temp2); Rprintf("Now Trying it again. But using exposed function\n"); temp = (wchar_t *)decode_MIME_value(triplet, determine_MIMETYPE(triplet),temp,&size); temp2 = Calloc(size +1, char); wcstombs(temp2,temp,size); Rprintf("Its a Text/plain string value is %s with size %d\n",temp2, size); Free(temp2); Free(temp); } if (!wcscmp(triplet.type.value,L"text/x-calvin-integer-32")){ Rprintf("Its a int32_t value is %d\n",decode_INT32_t(triplet.value)); Rprintf("Now Trying it again. But using exposed function\n"); decode_MIME_value(triplet, determine_MIMETYPE(triplet),&temp32,&size); Rprintf("Its a int32_t value is %d\n",temp32); } if (!wcscmp(triplet.type.value,L"text/x-calvin-integer-16")){ Rprintf("Its a int16_t value is %d\n",decode_INT16_t(triplet.value)); } if (!wcscmp(triplet.type.value,L"text/x-calvin-unsigned-integer-32")){ Rprintf("Its a uint32_t value is %d\n",decode_UINT32_t(triplet.value)); } if (!wcscmp(triplet.type.value,L"text/x-calvin-unsigned-integer-16")){ Rprintf("Its a uint16_t value is %d\n",decode_UINT16_t(triplet.value)); } if (!wcscmp(triplet.type.value,L"text/x-calvin-integer-8")){ Rprintf("Its a int8_t value is %d\n",decode_INT8_t(triplet.value)); } if (!wcscmp(triplet.type.value,L"text/x-calvin-unsigned-integer-8")){ Rprintf("Its a uint8_t value is %d\n",decode_UINT8_t(triplet.value)); } } static void print_nvt_triplet(nvt_triplet triplet){ print_AWSTRING(triplet.name); Rprintf(" "); // print_ASTRING(triplet.value); //Rprintf(" "); print_AWSTRING(triplet.type); //Rprintf("\n"); print_decode_nvt_triplet(triplet); } static void print_col_nvts_triplet(col_nvts_triplet triplet){ print_AWSTRING(triplet.name); Rprintf(" %d %d", triplet.type, triplet.size); Rprintf("\n"); } static void print_generic_header(generic_data_header header){ int i; print_ASTRING(header.data_type_id); Rprintf("\n"); print_ASTRING(header.unique_file_id); Rprintf("\n"); print_AWSTRING(header.Date_time); Rprintf("\n"); print_AWSTRING(header.locale); Rprintf("\n"); Rprintf("%d\n", header.n_name_type_value); for (i=0; i < header.n_name_type_value; i++){ print_nvt_triplet(header.name_type_value[i]); } Rprintf("%d\n",header.n_parent_headers); if (header.n_parent_headers > 0){ Rprintf("Printing Parental Headers\n"); for (i =0; i < header.n_parent_headers; i++){ print_generic_header(*(generic_data_header *)header.parent_headers[i]); } } } static void print_generic_data_group(generic_data_group data_group){ Rprintf("%d\n",data_group.file_position_nextgroup); Rprintf("%d\n",data_group.file_position_first_data); Rprintf("%d\n",data_group.n_data_sets); Rprintf("Data Group Name is : "); print_AWSTRING(data_group.data_group_name); Rprintf("\n"); } static void print_generic_data_set(generic_data_set data_set){ int i; Rprintf("%d\n%d\n",data_set.file_pos_first,data_set.file_pos_last); print_AWSTRING(data_set.data_set_name); Rprintf("\n"); Rprintf("%d\n",data_set.n_name_type_value); for (i=0; i < data_set.n_name_type_value; i++){ print_nvt_triplet(data_set.name_type_value[i]); } Rprintf("%d\n",data_set.ncols); for (i=0; i < data_set.ncols; i++){ print_col_nvts_triplet(data_set.col_name_type_value[i]); } Rprintf("%d\n",data_set.nrows); } SEXP Read_Generic(SEXP filename){ int i,j,k; SEXP return_value = R_NilValue; FILE *infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; const char *cur_file_name = CHAR(STRING_ELT(filename,0)); /* Pass through all the header information */ if ((infile = fopen(cur_file_name, "rb")) == NULL) { error("Unable to open the file %s\n",cur_file_name); return 0; } read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); Rprintf("========= Printing File Header =========\n"); print_file_header(my_header); Rprintf("========= Printing Generic Header =========\n"); print_generic_header(my_data_header); for (k =0; k < my_header.n_data_groups; k++){ Rprintf("========= Printing Data Group =========\n"); read_generic_data_group(&my_data_group,infile); print_generic_data_group(my_data_group); for (j=0; j < my_data_group.n_data_sets; j++){ read_generic_data_set(&my_data_set,infile); Rprintf("========= Printing Data Set =========\n"); print_generic_data_set(my_data_set); read_generic_data_set_rows(&my_data_set,infile); for (i =0; i < 1 ; i++){ //printf("%f\n",((float *)my_data_set.Data[0])[i]); } // Free_generic_data_set(&my_data_set); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); } Free_generic_data_group(&my_data_group); } Free_generic_data_header(&my_data_header); return return_value; } SEXP gzRead_Generic(SEXP filename){ int i,j,k; SEXP return_value = R_NilValue; gzFile infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; const char *cur_file_name = CHAR(STRING_ELT(filename,0)); /* Pass through all the header information */ if ((infile = gzopen(cur_file_name, "rb")) == NULL) { error("Unable to open the file %s\n",cur_file_name); return 0; } gzread_generic_file_header(&my_header, infile); gzread_generic_data_header(&my_data_header, infile); Rprintf("========= Printing File Header =========\n"); print_file_header(my_header); Rprintf("========= Printing Generic Header =========\n"); print_generic_header(my_data_header); for (k =0; k < my_header.n_data_groups; k++){ Rprintf("========= Printing Data Group =========\n"); gzread_generic_data_group(&my_data_group,infile); // read_generic_data_set(&my_data_set,infile); //read_generic_data_set_rows(&my_data_set,infile); print_generic_data_group(my_data_group); for (j=0; j < my_data_group.n_data_sets; j++){ gzread_generic_data_set(&my_data_set,infile); Rprintf("========= Printing Data Set =========\n"); print_generic_data_set(my_data_set); gzread_generic_data_set_rows(&my_data_set,infile); for (i =0; i < 1 ; i++){ //printf("%f\n",((float *)my_data_set.Data[0])[i]); } // Free_generic_data_set(&my_data_set); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); } Free_generic_data_group(&my_data_group); } Free_generic_data_header(&my_data_header); return return_value; } static SEXP file_header_R_List(generic_file_header *my_header){ SEXP return_value, return_names; SEXP tmp_sexp; PROTECT(return_value = allocVector(VECSXP,3)); PROTECT(tmp_sexp= allocVector(INTSXP,1)); INTEGER(tmp_sexp)[0] = (int32_t)my_header->magic_number; SET_VECTOR_ELT(return_value,0,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(INTSXP,1)); INTEGER(tmp_sexp)[0] = (int32_t)my_header->version; SET_VECTOR_ELT(return_value,1,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(INTSXP,1)); INTEGER(tmp_sexp)[0] = (int32_t)my_header->n_data_groups; SET_VECTOR_ELT(return_value,2,tmp_sexp); UNPROTECT(1); PROTECT(return_names = allocVector(STRSXP,3)); SET_STRING_ELT(return_names,0,mkChar("MagicNumber")); SET_STRING_ELT(return_names,1,mkChar("Version")); SET_STRING_ELT(return_names,2,mkChar("NumberDataGroups")); setAttrib(return_value, R_NamesSymbol, return_names); UNPROTECT(2); return return_value; } static SEXP decode_nvt_triplet(nvt_triplet triplet){ wchar_t *temp=0; char *temp2=0; int size; int temp32; float tempfloat; SEXP return_value=R_NilValue; if (!wcscmp(triplet.type.value,L"text/x-calvin-float")){ decode_MIME_value(triplet, determine_MIMETYPE(triplet),&tempfloat,&size); PROTECT(return_value=allocVector(REALSXP,1)); NUMERIC_POINTER(return_value)[0] = (double)tempfloat; UNPROTECT(1); return(return_value); } if (!wcscmp(triplet.type.value,L"text/ascii")){ temp2 = decode_MIME_value(triplet, determine_MIMETYPE(triplet),temp2,&size); PROTECT(return_value=allocVector(STRSXP,1)); SET_STRING_ELT(return_value,0,mkChar(temp2)); UNPROTECT(1); Free(temp2); return(return_value); } if (!wcscmp(triplet.type.value,L"text/plain")){ temp = (wchar_t *)decode_MIME_value(triplet, determine_MIMETYPE(triplet),temp,&size); temp2 = Calloc(size +1, char); wcstombs(temp2,temp,size); PROTECT(return_value=allocVector(STRSXP,1)); SET_STRING_ELT(return_value,0,mkChar(temp2)); UNPROTECT(1); Free(temp2); Free(temp); return(return_value); } if (!wcscmp(triplet.type.value,L"text/x-calvin-integer-32")){ decode_MIME_value(triplet, determine_MIMETYPE(triplet),&temp32,&size); PROTECT(return_value=allocVector(INTSXP,1)); INTEGER_POINTER(return_value)[0] = (int32_t)temp32; UNPROTECT(1); return(return_value); } if (!wcscmp(triplet.type.value,L"text/x-calvin-integer-16")){ PROTECT(return_value=allocVector(INTSXP,1)); INTEGER_POINTER(return_value)[0] = (int32_t)decode_INT16_t(triplet.value); UNPROTECT(1); return(return_value); } if (!wcscmp(triplet.type.value,L"text/x-calvin-unsigned-integer-32")){ PROTECT(return_value=allocVector(INTSXP,1)); INTEGER_POINTER(return_value)[0] = (int32_t)decode_UINT32_t(triplet.value); UNPROTECT(1); return(return_value); } if (!wcscmp(triplet.type.value,L"text/x-calvin-unsigned-integer-16")){ PROTECT(return_value=allocVector(INTSXP,1)); INTEGER_POINTER(return_value)[0] = (int32_t)decode_UINT16_t(triplet.value); UNPROTECT(1); return(return_value); } if (!wcscmp(triplet.type.value,L"text/x-calvin-integer-8")){ PROTECT(return_value=allocVector(INTSXP,1)); INTEGER_POINTER(return_value)[0] = (int32_t)decode_INT8_t(triplet.value); UNPROTECT(1); return(return_value); } if (!wcscmp(triplet.type.value,L"text/x-calvin-unsigned-integer-8")){ PROTECT(return_value=allocVector(INTSXP,1)); INTEGER_POINTER(return_value)[0] = (int32_t)decode_UINT8_t(triplet.value); UNPROTECT(1); return(return_value); } return(return_value); } static SEXP data_header_R_List(generic_data_header *my_data_header){ SEXP return_value, return_names; SEXP tmp_sexp, tmp_names; char *temp; int i; PROTECT(return_value = allocVector(VECSXP,8)); PROTECT(tmp_sexp= allocVector(STRSXP,1)); if (my_data_header->data_type_id.len > 0){ SET_STRING_ELT(tmp_sexp,0,mkChar(my_data_header->data_type_id.value)); } SET_VECTOR_ELT(return_value,0,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(STRSXP,1)); if (my_data_header->unique_file_id.len > 0){ SET_STRING_ELT(tmp_sexp,0,mkChar(my_data_header->unique_file_id.value)); } SET_VECTOR_ELT(return_value,1,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(STRSXP,1)); if (my_data_header->Date_time.len > 0){ temp = Calloc(my_data_header->Date_time.len+1,char); wcstombs(temp, my_data_header->Date_time.value, my_data_header->Date_time.len); SET_STRING_ELT(tmp_sexp,0,mkChar(temp)); Free(temp); } SET_VECTOR_ELT(return_value,2,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(STRSXP,1)); if (my_data_header->locale.len > 0){ temp = Calloc(my_data_header->locale.len+1,char); wcstombs(temp, my_data_header->locale.value, my_data_header->locale.len); SET_STRING_ELT(tmp_sexp,0,mkChar(temp)); Free(temp); } SET_VECTOR_ELT(return_value,3,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(INTSXP,1)); INTEGER(tmp_sexp)[0] = (int32_t)my_data_header->n_name_type_value; SET_VECTOR_ELT(return_value,4,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(VECSXP,my_data_header->n_name_type_value)); PROTECT(tmp_names = allocVector(STRSXP,my_data_header->n_name_type_value)); for (i=0; i < my_data_header->n_name_type_value; i++){ SET_VECTOR_ELT(tmp_sexp,i,decode_nvt_triplet(my_data_header->name_type_value[i])); temp = Calloc(my_data_header->name_type_value[i].name.len+1,char); wcstombs(temp, my_data_header->name_type_value[i].name.value, my_data_header->name_type_value[i].name.len); SET_STRING_ELT(tmp_names,i,mkChar(temp)); Free(temp); } setAttrib(tmp_sexp, R_NamesSymbol, tmp_names); SET_VECTOR_ELT(return_value,5,tmp_sexp); UNPROTECT(2); PROTECT(tmp_sexp= allocVector(INTSXP,1)); INTEGER(tmp_sexp)[0] = (int32_t)my_data_header->n_parent_headers; SET_VECTOR_ELT(return_value,6,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(VECSXP,my_data_header->n_parent_headers)); if (my_data_header->n_parent_headers > 0){ for (i =0; i < my_data_header->n_parent_headers; i++){ SET_VECTOR_ELT(tmp_sexp,i,data_header_R_List(my_data_header->parent_headers[i])); } } SET_VECTOR_ELT(return_value,7,tmp_sexp); UNPROTECT(1); PROTECT(return_names = allocVector(STRSXP,8)); SET_STRING_ELT(return_names,0,mkChar("DataTypeID")); SET_STRING_ELT(return_names,1,mkChar("UniqueFileID")); SET_STRING_ELT(return_names,2,mkChar("DateTime")); SET_STRING_ELT(return_names,3,mkChar("Locale")); SET_STRING_ELT(return_names,4,mkChar("NumberOfNameValueType")); SET_STRING_ELT(return_names,5,mkChar("NVTList")); SET_STRING_ELT(return_names,6,mkChar("NumberOfParentHeaders")); SET_STRING_ELT(return_names,7,mkChar("ParentHeaders")); setAttrib(return_value, R_NamesSymbol, return_names); UNPROTECT(2); return return_value; } static SEXP data_group_R_list(generic_data_group *my_data_group){ SEXP return_value; SEXP tmp_sexp=R_NilValue, return_names; char *temp; PROTECT(return_value = allocVector(VECSXP,2)); if (my_data_group->data_group_name.len > 0){ PROTECT(tmp_sexp= allocVector(STRSXP,1)); temp = Calloc(my_data_group->data_group_name.len+1,char); wcstombs(temp, my_data_group->data_group_name.value, my_data_group->data_group_name.len); SET_STRING_ELT(tmp_sexp,0,mkChar(temp)); Free(temp); } SET_VECTOR_ELT(return_value,0,tmp_sexp); UNPROTECT(1); SET_VECTOR_ELT(return_value,1,allocVector(VECSXP,my_data_group->n_data_sets)); PROTECT(return_names = allocVector(STRSXP,2)); SET_STRING_ELT(return_names,0,mkChar("Name")); SET_STRING_ELT(return_names,1,mkChar("Datasets")); setAttrib(return_value, R_NamesSymbol, return_names); UNPROTECT(2); return return_value; } static SEXP generic_data_set_R_List(generic_data_set *my_data_set){ SEXP return_value, return_names; SEXP tmp_sexp, tmp_names; int i; char *temp; PROTECT(return_value = allocVector(VECSXP,3)); PROTECT(tmp_sexp= allocVector(STRSXP,1)); if (my_data_set->data_set_name.len > 0){ temp = Calloc(my_data_set->data_set_name.len+1,char); wcstombs(temp, my_data_set->data_set_name.value, my_data_set->data_set_name.len); SET_STRING_ELT(tmp_sexp,0,mkChar(temp)); Free(temp); } SET_VECTOR_ELT(return_value,0,tmp_sexp); UNPROTECT(1); PROTECT(tmp_sexp= allocVector(VECSXP,my_data_set->n_name_type_value)); PROTECT(tmp_names = allocVector(STRSXP,my_data_set->n_name_type_value)); for (i=0; i < my_data_set->n_name_type_value; i++){ //print_nvt_triplet(data_set.name_type_value[i]); SET_VECTOR_ELT(tmp_sexp,i,decode_nvt_triplet(my_data_set->name_type_value[i])); temp = Calloc(my_data_set->name_type_value[i].name.len+1,char); wcstombs(temp, my_data_set->name_type_value[i].name.value, my_data_set->name_type_value[i].name.len); SET_STRING_ELT(tmp_names,i,mkChar(temp)); Free(temp); } setAttrib(tmp_sexp, R_NamesSymbol, tmp_names); SET_VECTOR_ELT(return_value,1,tmp_sexp); UNPROTECT(2); PROTECT(tmp_sexp= allocVector(VECSXP,my_data_set->ncols)); SET_VECTOR_ELT(return_value,2,tmp_sexp); PROTECT(tmp_names = allocVector(STRSXP,my_data_set->ncols)); for (i=0; i < my_data_set->ncols; i++){ temp = Calloc(my_data_set->col_name_type_value[i].name.len+1,char); wcstombs(temp, my_data_set->col_name_type_value[i].name.value, my_data_set->col_name_type_value[i].name.len); SET_STRING_ELT(tmp_names,i,mkChar(temp)); Free(temp); } setAttrib(tmp_sexp, R_NamesSymbol, tmp_names); UNPROTECT(2); PROTECT(return_names = allocVector(STRSXP,3)); SET_STRING_ELT(return_names,0,mkChar("Name")); SET_STRING_ELT(return_names,1,mkChar("NVTList")); SET_STRING_ELT(return_names,2,mkChar("DataColumns")); setAttrib(return_value, R_NamesSymbol, return_names); UNPROTECT(2); return return_value; } static SEXP generic_data_set_rows_R_List(generic_data_set *data_set, int col){ SEXP return_value; int i,j; char *temp; j = col; switch(data_set->col_name_type_value[j].type){ case 0: PROTECT(return_value = allocVector(INTSXP, data_set->nrows)); for (i=0; i < data_set->nrows; i++){ INTEGER_POINTER(return_value)[i] = (int32_t)((char *)data_set->Data[j])[i]; } break; case 1: PROTECT(return_value = allocVector(INTSXP, data_set->nrows)); for (i=0; i < data_set->nrows; i++){ INTEGER_POINTER(return_value)[i] = (int32_t)((unsigned char *)data_set->Data[j])[i]; } break; case 2: PROTECT(return_value = allocVector(INTSXP, data_set->nrows)); for (i=0; i < data_set->nrows; i++){ INTEGER_POINTER(return_value)[i] = (int32_t)((short *)data_set->Data[j])[i]; } break; case 3: PROTECT(return_value = allocVector(INTSXP, data_set->nrows)); for (i=0; i < data_set->nrows; i++){ INTEGER_POINTER(return_value)[i] = (int32_t)((unsigned short *)data_set->Data[j])[i]; } break; case 4: PROTECT(return_value = allocVector(INTSXP, data_set->nrows)); for (i=0; i < data_set->nrows; i++){ INTEGER_POINTER(return_value)[i] = (int32_t)((int32_t *)data_set->Data[j])[i]; } break; case 5: PROTECT(return_value = allocVector(INTSXP, data_set->nrows)); for (i=0; i < data_set->nrows; i++){ INTEGER_POINTER(return_value)[i] = (int32_t)((uint32_t *)data_set->Data[j])[i]; } break; case 6: PROTECT( return_value = allocVector(REALSXP, data_set->nrows)); for (i=0; i < data_set->nrows; i++){ NUMERIC_POINTER(return_value)[i] = (double)((float *)data_set->Data[j])[i]; } break; /* case 7: PROTECT(return_value = allocVector(REALSXP, data_set->nrows)); for (i=0; i < data_set->nrows; i++){ NUMERIC_POINTER(return_value)[i] = (double)((double *)data_set->Data[j])[i]; } break; */ case 7: PROTECT(return_value = allocVector(STRSXP, data_set->nrows)); for (i=0; i < data_set->nrows; i++){ temp = (char *)((ASTRING *)data_set->Data[j])[i].value; SET_STRING_ELT(return_value,i,mkChar(temp)); } break; case 8: PROTECT(return_value = allocVector(STRSXP, data_set->nrows)); for (i=0; i < data_set->nrows; i++){ temp = Calloc(((AWSTRING *)data_set->Data[j])[i].len+1,char); wcstombs(temp, ((AWSTRING *)data_set->Data[j])[i].value,((AWSTRING *)data_set->Data[j])[i].len); SET_STRING_ELT(return_value,i,mkChar(temp)); Free(temp); } break; } UNPROTECT(1); return return_value; } SEXP Read_Generic_R_List(SEXP filename){ int i,j,k; SEXP return_value = R_NilValue; SEXP return_names; SEXP temp_sxp = R_NilValue,temp_sxp2 = R_NilValue,temp_names = R_NilValue,temp_names2 = R_NilValue; FILE *infile; char *temp; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; const char *cur_file_name = CHAR(STRING_ELT(filename,0)); /* Pass through all the header information */ if ((infile = fopen(cur_file_name, "rb")) == NULL) { error("Unable to open the file %s\n",cur_file_name); return 0; } /* Read the two header sections first */ read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); PROTECT(return_value = allocVector(VECSXP,3)); /* File Header is First Element of Return List */ SET_VECTOR_ELT(return_value,0,file_header_R_List(&my_header)); /* Data Header is Second Element of Return List */ SET_VECTOR_ELT(return_value,1,data_header_R_List(&my_data_header)); /* Data Groups are it Third Element of Return List */ /* Now Read Data groups */ PROTECT(temp_sxp = allocVector(VECSXP,my_header.n_data_groups)); SET_VECTOR_ELT(return_value,2,temp_sxp); UNPROTECT(1); PROTECT(temp_names = allocVector(STRSXP,my_header.n_data_groups)); for (k =0; k < my_header.n_data_groups; k++){ read_generic_data_group(&my_data_group,infile); SET_VECTOR_ELT(temp_sxp,k,data_group_R_list(&my_data_group)); temp = Calloc(my_data_group.data_group_name.len+1,char); wcstombs(temp, my_data_group.data_group_name.value, my_data_group.data_group_name.len); SET_STRING_ELT(temp_names,k,mkChar(temp)); Free(temp); PROTECT(temp_names2 = allocVector(STRSXP,my_data_group.n_data_sets)); for (j=0; j < my_data_group.n_data_sets; j++){ read_generic_data_set(&my_data_set,infile); temp_sxp2 = generic_data_set_R_List(&my_data_set); SET_VECTOR_ELT(VECTOR_ELT(VECTOR_ELT(temp_sxp,k),1),j,temp_sxp2); temp = Calloc(my_data_set.data_set_name.len+1,char); wcstombs(temp, my_data_set.data_set_name.value, my_data_set.data_set_name.len); SET_STRING_ELT(temp_names2,j,mkChar(temp)); Free(temp); read_generic_data_set_rows(&my_data_set,infile); for (i =0; i < my_data_set.ncols; i++){ SET_VECTOR_ELT(VECTOR_ELT(temp_sxp2,2),i,generic_data_set_rows_R_List(&my_data_set, i)); } fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); } setAttrib(VECTOR_ELT(VECTOR_ELT(temp_sxp,k),1), R_NamesSymbol, temp_names2); UNPROTECT(1); Free_generic_data_group(&my_data_group); } Free_generic_data_header(&my_data_header); setAttrib(temp_sxp, R_NamesSymbol, temp_names); UNPROTECT(1); PROTECT(return_names = allocVector(STRSXP,3)); SET_STRING_ELT(return_names,0,mkChar("FileHeader")); SET_STRING_ELT(return_names,1,mkChar("DataHeader")); SET_STRING_ELT(return_names,2,mkChar("DataGroup")); setAttrib(return_value, R_NamesSymbol, return_names); UNPROTECT(2); fclose(infile); return return_value; } affyio/src/read_generic.h0000644000175400017540000000601413556116171016433 0ustar00biocbuildbiocbuild #include #include /****** ****** ****** Data Structures ****** ******/ /* File header */ typedef struct{ uint8_t magic_number; uint8_t version; int32_t n_data_groups; uint32_t first_group_file_pos; } generic_file_header; /* An affy generic STRING */ typedef struct{ int32_t len; char *value; } ASTRING; /* An affy generic WSTRING */ typedef struct{ int32_t len; wchar_t *value; } AWSTRING; /* Name Value Type Triplet */ typedef struct{ AWSTRING name; ASTRING value; AWSTRING type; } nvt_triplet; /* Data Header */ typedef struct generic_data_header *generic_data_header_pointer; typedef struct{ ASTRING data_type_id; /*Stored in file as INT followed by CHAR array */ ASTRING unique_file_id; /*See above */ AWSTRING Date_time; /*Stored in file as INT followed by WCHAR array */ AWSTRING locale; int32_t n_name_type_value; nvt_triplet *name_type_value; int32_t n_parent_headers; void **parent_headers; } generic_data_header; /* Data Group */ typedef struct { uint32_t file_position_nextgroup; uint32_t file_position_first_data; int32_t n_data_sets; AWSTRING data_group_name; } generic_data_group; /* Dataset */ typedef struct { AWSTRING name; uint8_t type; int32_t size; } col_nvts_triplet; typedef struct { uint32_t file_pos_first; uint32_t file_pos_last; AWSTRING data_set_name; int32_t n_name_type_value; nvt_triplet *name_type_value; uint32_t ncols; col_nvts_triplet* col_name_type_value; uint32_t nrows; void **Data; /* in the docs this is rows */ } generic_data_set; typedef enum{ ASCIITEXT = 1, PLAINTEXT = 2, UINT8 = 3, INT8= 4, UINT16 = 5, INT16 = 6, UINT32 = 7, INT32 = 8, FLOAT32 = 9 } AffyMIMEtypes; AffyMIMEtypes determine_MIMETYPE(nvt_triplet triplet); void *decode_MIME_value(nvt_triplet triplet, AffyMIMEtypes mimetype, void *result, int *size); char *decode_MIME_value_toASCII(nvt_triplet triplet, AffyMIMEtypes mimetype, void *result, int *size); nvt_triplet* find_nvt(generic_data_header *data_header,char *name); int read_generic_file_header(generic_file_header* file_header, FILE *instream); int read_generic_data_header(generic_data_header *data_header, FILE *instream); int read_generic_data_group(generic_data_group *data_group, FILE *instream); int read_generic_data_set(generic_data_set *data_set, FILE *instream); int read_generic_data_set_rows(generic_data_set *data_set, FILE *instream); void Free_generic_data_header(generic_data_header *header); void Free_generic_data_group(generic_data_group *data_group); void Free_generic_data_set(generic_data_set *data_set); int gzread_generic_file_header(generic_file_header* file_header, gzFile instream); int gzread_generic_data_header(generic_data_header *data_header, gzFile instream); int gzread_generic_data_group(generic_data_group *data_group,gzFile instream); int gzread_generic_data_set(generic_data_set *data_set, gzFile instream); int gzread_generic_data_set_rows(generic_data_set *data_set, gzFile instream); affyio/src/read_multichannel_celfile_generic.c0000644000175400017540000007015713556116171022665 0ustar00biocbuildbiocbuild/************************************************************* ** ** file: read_multichannel_celfile_generic.c ** ** Written by B. M. Bolstad ** ** Aim is to read in Affymetrix CEL files in the ** "Command Console Generic Data" File Format ** This format is sometimes known as the Calvin format ** ** As with other file format functionality in affyio ** gzipped files are accepted. ** ** The implementation here is based upon openly available ** file format information. The code here is not dependent or based ** in anyway on that in the Fusion SDK. ** ** ** History ** Sept 3, 2007 -Initial version ** Sept 9, 2007 - fix compiler warnings ** Oct 11, 2007 - fix missing DatHeader problem ** Feb 11, 2008 - add #include for inttypes.h in situations that stdint.h might not exist ** Feb 13, 2008 - fix problems with generic_get_detailed_header_info(), gzgeneric_get_detailed_header_info() ** May 18, 2009 - Add Ability to extract scan date from CEL file header ** May 25, 2010 - Multichannel CELfile support adapted from single channel parser ** Sep 4, 2017 - change gzFile* to gzFile ** *************************************************************/ #include #include #include #include #ifdef HAVE_STDINT_H #include #elif HAVE_INTTYPES_H #include #endif #include #include #include #include #include "read_generic.h" #include "read_celfile_generic.h" #include "read_multichannel_celfile_generic.h" #include "read_abatch.h" int isGenericMultiChannelCelFile(const char *filename){ FILE *infile; generic_file_header file_header; generic_data_header data_header; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); return 0; } if (!read_generic_file_header(&file_header,infile)){ fclose(infile); return 0; } if (!read_generic_data_header(&data_header,infile)){ Free_generic_data_header(&data_header); fclose(infile); return 0; } if (strcmp(data_header.data_type_id.value, "affymetrix-calvin-multi-intensity") !=0){ Free_generic_data_header(&data_header); fclose(infile); return 0; } Free_generic_data_header(&data_header); fclose(infile); return 1; } static int compare_AWSTRING_Intensity(AWSTRING string){ int rv = 0; if (string.len > 0){ char *temp = Calloc(string.len+1,char); wcstombs(temp, string.value, string.len); rv = strcmp(temp,"Intensity"); Free(temp); } return rv; } int multichannel_determine_number_channels(const char *filename){ int j=0; int returnvalue = 0; FILE *infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; uint32_t next_group =1; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); do { read_generic_data_group(&my_data_group,infile); next_group = my_data_group.file_position_nextgroup; for (j=0; j < my_data_group.n_data_sets; j++){ read_generic_data_set(&my_data_set,infile); if (!compare_AWSTRING_Intensity(my_data_set.data_set_name)){ returnvalue++; break; } read_generic_data_set_rows(&my_data_set,infile); Free_generic_data_set(&my_data_set); } Free_generic_data_group(&my_data_group); fseek(infile,next_group,SEEK_SET); } while (next_group > 0); fclose(infile); Free_generic_data_header(&my_data_header); return(returnvalue); } char *multichannel_determine_channel_name(const char *filename, int channelindex){ int k=0; char *returnvalue = 0; FILE *infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; uint32_t next_group =1; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); while (k < channelindex){ read_generic_data_group(&my_data_group,infile); next_group = my_data_group.file_position_nextgroup; fseek(infile,next_group,SEEK_SET); Free_generic_data_group(&my_data_group); k++; } read_generic_data_group(&my_data_group,infile); if (my_data_group.data_group_name.len > 0){ returnvalue = Calloc(my_data_group.data_group_name.len+1,char); wcstombs(returnvalue, my_data_group.data_group_name.value, my_data_group.data_group_name.len); } Free_generic_data_group(&my_data_group); fclose(infile); Free_generic_data_header(&my_data_header); return(returnvalue); } /*************************************************************** ** ** static int read_binarycel_file_intensities(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows) ** ** ** This function reads binary cel file intensities into the data matrix ** **************************************************************/ int read_genericcel_file_intensities_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int channelindex){ int i=0, k=0; FILE *infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; uint32_t next_group =1; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); /* skip merrily through the file (optimise this with file pointers later) */ while (k < channelindex){ read_generic_data_group(&my_data_group,infile); next_group = my_data_group.file_position_nextgroup; fseek(infile,next_group,SEEK_SET); Free_generic_data_group(&my_data_group); k++; } /* Now the actual channel of intensities */ read_generic_data_group(&my_data_group,infile); read_generic_data_set(&my_data_set,infile); read_generic_data_set_rows(&my_data_set,infile); for (i =0; i < my_data_set.nrows; i++){ intensity[chip_num*my_data_set.nrows + i] = (double)(((float *)my_data_set.Data[0])[i]); } Free_generic_data_set(&my_data_set); Free_generic_data_group(&my_data_group); fclose(infile); Free_generic_data_header(&my_data_header); return(0); } int read_genericcel_file_stddev_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int channelindex){ int i=0, k=0; FILE *infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; uint32_t next_group =1; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); /* skip merrily through the file (optimise this with file pointers later) */ while (k < channelindex){ read_generic_data_group(&my_data_group,infile); next_group = my_data_group.file_position_nextgroup; fseek(infile,next_group,SEEK_SET); Free_generic_data_group(&my_data_group); k++; } read_generic_data_group(&my_data_group,infile); read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); read_generic_data_set(&my_data_set,infile); read_generic_data_set_rows(&my_data_set,infile); for (i =0; i < my_data_set.nrows; i++){ intensity[chip_num*my_data_set.nrows + i] = (double)(((float *)my_data_set.Data[0])[i]); } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); fclose(infile); return(0); } int read_genericcel_file_npixels_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int channelindex){ int i=0, k=0; FILE *infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; uint32_t next_group =1; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); /* skip merrily through the file (optimise this with file pointers later) */ while (k < channelindex){ read_generic_data_group(&my_data_group,infile); next_group = my_data_group.file_position_nextgroup; fseek(infile,next_group,SEEK_SET); Free_generic_data_group(&my_data_group); k++; } read_generic_data_group(&my_data_group,infile); read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); read_generic_data_set(&my_data_set,infile); read_generic_data_set_rows(&my_data_set,infile); for (i =0; i < my_data_set.nrows; i++){ intensity[chip_num*my_data_set.nrows + i] = (double)(((short *)my_data_set.Data[0])[i]); } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); fclose(infile); return(0); } void generic_get_masks_outliers_multichannel(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y, int channelindex){ int i=0, k=0; FILE *infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; uint32_t next_group =1; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); } read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); /* skip merrily through the file (optimise this with file pointers later) */ while (k < channelindex){ read_generic_data_group(&my_data_group,infile); next_group = my_data_group.file_position_nextgroup; fseek(infile,next_group,SEEK_SET); Free_generic_data_group(&my_data_group); k++; } read_generic_data_group(&my_data_group,infile); /* passing the intensities */ read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the stddev */ read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the npixels */ read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Outlier" */ read_generic_data_set(&my_data_set,infile); *noutliers = my_data_set.nrows; *outliers_x = Calloc(my_data_set.nrows,short); *outliers_y = Calloc(my_data_set.nrows,short); read_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ (*outliers_x)[i] = ((short *)my_data_set.Data[0])[i]; (*outliers_y)[i] = ((short *)my_data_set.Data[1])[i]; } fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Mask" */ read_generic_data_set(&my_data_set,infile); *nmasks = my_data_set.nrows; *masks_x = Calloc(my_data_set.nrows,short); *masks_y = Calloc(my_data_set.nrows,short); read_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ (*outliers_x)[i] = ((short *)my_data_set.Data[0])[i]; (*outliers_y)[i] = ((short *)my_data_set.Data[1])[i]; } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); fclose(infile); } void generic_apply_masks_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int rm_mask, int rm_outliers, int channelindex){ int i=0; int cur_index; short cur_x, cur_y; int nrows; int size; FILE *infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; nvt_triplet *triplet; AffyMIMEtypes cur_mime_type; if ((infile = fopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); } read_generic_file_header(&my_header, infile); read_generic_data_header(&my_data_header, infile); read_generic_data_group(&my_data_group,infile); triplet = find_nvt(&my_data_header,"affymetrix-cel-rows"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &nrows, &size); /* passing the intensities */ read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the stddev */ read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the npixels */ read_generic_data_set(&my_data_set,infile); fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Outlier" */ read_generic_data_set(&my_data_set,infile); if (rm_outliers){ read_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ cur_x = ((short *)my_data_set.Data[0])[i]; cur_y = ((short *)my_data_set.Data[1])[i]; cur_index = (int)cur_x + nrows*(int)cur_y; intensity[chip_num*rows + cur_index] = R_NaN; } } fseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Mask" */ read_generic_data_set(&my_data_set,infile); if (rm_mask){ read_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ cur_x = ((short *)my_data_set.Data[0])[i]; cur_y = ((short *)my_data_set.Data[1])[i]; cur_index = (int)cur_x + nrows*(int)cur_y; intensity[chip_num*rows + cur_index] = R_NaN; } } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); fclose(infile); } /******************************************************************************************************* ******************************************************************************************************* ** ** Code below supports gzipped command console format MultiChannel CEL files ** ******************************************************************************************************* *******************************************************************************************************/ int isgzGenericMultiChannelCelFile(const char *filename){ gzFile infile; generic_file_header file_header; generic_data_header data_header; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s",filename); return 0; } if (!gzread_generic_file_header(&file_header,infile)){ gzclose(infile); return 0; } if (!gzread_generic_data_header(&data_header,infile)){ Free_generic_data_header(&data_header); gzclose(infile); return 0; } if (strcmp(data_header.data_type_id.value, "affymetrix-calvin-multi-intensity") !=0){ Free_generic_data_header(&data_header); gzclose(infile); return 0; } Free_generic_data_header(&data_header); gzclose(infile); return 1; } /* basic idea is to count how many datagroups have a dataset called "Intensity" */ int gzmultichannel_determine_number_channels(const char *filename){ int j=0; int returnvalue = 0; gzFile infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; uint32_t next_group =1; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } gzread_generic_file_header(&my_header, infile); gzread_generic_data_header(&my_data_header, infile); do { gzread_generic_data_group(&my_data_group,infile); next_group = my_data_group.file_position_nextgroup; for (j=0; j < my_data_group.n_data_sets; j++){ gzread_generic_data_set(&my_data_set,infile); if (!compare_AWSTRING_Intensity(my_data_set.data_set_name)){ returnvalue++; break; } gzread_generic_data_set_rows(&my_data_set,infile); Free_generic_data_set(&my_data_set); } Free_generic_data_group(&my_data_group); gzseek(infile,next_group,SEEK_SET); } while (next_group > 0); gzclose(infile); Free_generic_data_header(&my_data_header); return(returnvalue); } char *gzmultichannel_determine_channel_name(const char *filename, int channelindex){ int k=0; char *returnvalue = 0; gzFile infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; uint32_t next_group =1; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } gzread_generic_file_header(&my_header, infile); gzread_generic_data_header(&my_data_header, infile); while (k < channelindex){ gzread_generic_data_group(&my_data_group,infile); next_group = my_data_group.file_position_nextgroup; gzseek(infile,next_group,SEEK_SET); Free_generic_data_group(&my_data_group); k++; } gzread_generic_data_group(&my_data_group,infile); if (my_data_group.data_group_name.len > 0){ returnvalue = Calloc(my_data_group.data_group_name.len+1,char); wcstombs(returnvalue, my_data_group.data_group_name.value, my_data_group.data_group_name.len); } Free_generic_data_group(&my_data_group); gzclose(infile); Free_generic_data_header(&my_data_header); return(returnvalue); } int gzread_genericcel_file_intensities_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int channelindex){ int i=0, k=0; gzFile infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; uint32_t next_group =1; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } gzread_generic_file_header(&my_header, infile); gzread_generic_data_header(&my_data_header, infile); /* skip merrily through the file (optimise this with file pointers later) */ while (k < channelindex){ gzread_generic_data_group(&my_data_group,infile); next_group = my_data_group.file_position_nextgroup; gzseek(infile,next_group,SEEK_SET); Free_generic_data_group(&my_data_group); k++; } /* Now the actual channel of intensities */ gzread_generic_data_group(&my_data_group,infile); gzread_generic_data_set(&my_data_set,infile); gzread_generic_data_set_rows(&my_data_set,infile); for (i =0; i < my_data_set.nrows; i++){ intensity[chip_num*my_data_set.nrows + i] = (double)(((float *)my_data_set.Data[0])[i]); } Free_generic_data_set(&my_data_set); Free_generic_data_group(&my_data_group); gzclose(infile); Free_generic_data_header(&my_data_header); return(0); } int gzread_genericcel_file_stddev_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int channelindex){ int i=0, k=0; gzFile infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; uint32_t next_group =1; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } gzread_generic_file_header(&my_header, infile); gzread_generic_data_header(&my_data_header, infile); /* skip merrily through the file (optimise this with file pointers later) */ while (k < channelindex){ gzread_generic_data_group(&my_data_group,infile); next_group = my_data_group.file_position_nextgroup; gzseek(infile,next_group,SEEK_SET); Free_generic_data_group(&my_data_group); k++; } gzread_generic_data_group(&my_data_group,infile); gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); gzread_generic_data_set(&my_data_set,infile); gzread_generic_data_set_rows(&my_data_set,infile); for (i =0; i < my_data_set.nrows; i++){ intensity[chip_num*my_data_set.nrows + i] = (double)(((float *)my_data_set.Data[0])[i]); } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); gzclose(infile); return(0); } int gzread_genericcel_file_npixels_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int channelindex){ int i=0, k=0; gzFile infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; uint32_t next_group =1; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); return 0; } gzread_generic_file_header(&my_header, infile); gzread_generic_data_header(&my_data_header, infile); /* skip merrily through the file (optimise this with file pointers later) */ while (k < channelindex){ gzread_generic_data_group(&my_data_group,infile); next_group = my_data_group.file_position_nextgroup; gzseek(infile,next_group,SEEK_SET); Free_generic_data_group(&my_data_group); k++; } gzread_generic_data_group(&my_data_group,infile); gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); gzread_generic_data_set(&my_data_set,infile); gzread_generic_data_set_rows(&my_data_set,infile); for (i =0; i < my_data_set.nrows; i++){ intensity[chip_num*my_data_set.nrows + i] = (double)(((short *)my_data_set.Data[0])[i]); } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); gzclose(infile); return(0); } void gzgeneric_get_masks_outliers_multichannel(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y, int channelindex){ int i=0, k=0; gzFile infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; uint32_t next_group =1; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); } gzread_generic_file_header(&my_header, infile); gzread_generic_data_header(&my_data_header, infile); /* skip merrily through the file (optimise this with file pointers later) */ while (k < channelindex){ gzread_generic_data_group(&my_data_group,infile); next_group = my_data_group.file_position_nextgroup; gzseek(infile,next_group,SEEK_SET); Free_generic_data_group(&my_data_group); k++; } gzread_generic_data_group(&my_data_group,infile); /* passing the intensities */ gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the stddev */ gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the npixels */ gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Outlier" */ gzread_generic_data_set(&my_data_set,infile); *noutliers = my_data_set.nrows; *outliers_x = Calloc(my_data_set.nrows,short); *outliers_y = Calloc(my_data_set.nrows,short); gzread_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ (*outliers_x)[i] = ((short *)my_data_set.Data[0])[i]; (*outliers_y)[i] = ((short *)my_data_set.Data[1])[i]; } gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Mask" */ gzread_generic_data_set(&my_data_set,infile); *nmasks = my_data_set.nrows; *masks_x = Calloc(my_data_set.nrows,short); *masks_y = Calloc(my_data_set.nrows,short); gzread_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ (*outliers_x)[i] = ((short *)my_data_set.Data[0])[i]; (*outliers_y)[i] = ((short *)my_data_set.Data[1])[i]; } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); gzclose(infile); } void gzgeneric_apply_masks_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int rm_mask, int rm_outliers, int channelindex){ int i=0; int cur_index; short cur_x, cur_y; int nrows; int size; gzFile infile; generic_file_header my_header; generic_data_header my_data_header; generic_data_group my_data_group; generic_data_set my_data_set; nvt_triplet *triplet; AffyMIMEtypes cur_mime_type; if ((infile = gzopen(filename, "rb")) == NULL) { error("Unable to open the file %s\n",filename); } gzread_generic_file_header(&my_header, infile); gzread_generic_data_header(&my_data_header, infile); gzread_generic_data_group(&my_data_group,infile); triplet = find_nvt(&my_data_header,"affymetrix-cel-rows"); cur_mime_type = determine_MIMETYPE(*triplet); decode_MIME_value(*triplet,cur_mime_type, &nrows, &size); /* passing the intensities */ gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the stddev */ gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* passing by the npixels */ gzread_generic_data_set(&my_data_set,infile); gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Outlier" */ gzread_generic_data_set(&my_data_set,infile); if (rm_outliers){ gzread_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ cur_x = ((short *)my_data_set.Data[0])[i]; cur_y = ((short *)my_data_set.Data[1])[i]; cur_index = (int)cur_x + nrows*(int)cur_y; intensity[chip_num*rows + cur_index] = R_NaN; } } gzseek(infile, my_data_set.file_pos_last, SEEK_SET); Free_generic_data_set(&my_data_set); /* Now lets go for the "Mask" */ gzread_generic_data_set(&my_data_set,infile); if (rm_mask){ gzread_generic_data_set_rows(&my_data_set,infile); for (i=0; i < my_data_set.nrows; i++){ cur_x = ((short *)my_data_set.Data[0])[i]; cur_y = ((short *)my_data_set.Data[1])[i]; cur_index = (int)cur_x + nrows*(int)cur_y; intensity[chip_num*rows + cur_index] = R_NaN; } } Free_generic_data_set(&my_data_set); Free_generic_data_header(&my_data_header); Free_generic_data_group(&my_data_group); gzclose(infile); } affyio/src/read_multichannel_celfile_generic.h0000644000175400017540000000430513556116171022662 0ustar00biocbuildbiocbuild#ifndef READ_MULTICHANNEL_CELFILE_GENERIC_H #define READ_MULTICHANNEL_CELFILE_GENERIC_H #include "read_abatch.h" int isGenericMultiChannelCelFile(const char *filename); int read_genericcel_file_intensities_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int channelindex); int read_genericcel_file_stddev_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int channelindex); int read_genericcel_file_npixels_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int channelindex); void generic_get_masks_outliers_multichannel(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y, int channelindex); void generic_apply_masks_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int rm_mask, int rm_outliers, int channelindex); int multichannel_determine_number_channels(const char *filename); char *multichannel_determine_channel_name(const char *filename, int channelindex); int isgzGenericMultiChannelCelFile(const char *filename); int gzread_genericcel_file_intensities_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int channelindex); int gzread_genericcel_file_stddev_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int channelindex); int gzread_genericcel_file_npixels_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int channelindex); void gzgeneric_get_masks_outliers_multichannel(const char *filename, int *nmasks, short **masks_x, short **masks_y, int *noutliers, short **outliers_x, short **outliers_y, int channelindex); void gzgeneric_apply_masks_multichannel(const char *filename, double *intensity, int chip_num, int rows, int cols,int chip_dim_rows, int rm_mask, int rm_outliers, int channelindex); int gzmultichannel_determine_number_channels(const char *filename); char *gzmultichannel_determine_channel_name(const char *filename, int channelindex); #endif affyio/src/read_pgf.c0000644000175400017540000010371113556116171015570 0ustar00biocbuildbiocbuild/****************************************************************** ** ** file: read_pgf.c ** ** Aim: implement parsing of PGF format files ** ** Copyright (C) 2007 B. M. Bolstad ** ** Created on Nov 4, 2007 ** ** History ** Nov 4, 2007 - Initial version ** Dec 17. 2007 - add function for counting number of each type of probeset ** Dec 31, 2007 - add function which checks that all required fields are present ** Mar 18, 2008 - fix error in read_pgf_header function ** ** ** ******************************************************************/ #include #include #include #define BUFFERSIZE 1024 /******************************************************************* ******************************************************************* ** ** Structures for dealing with pgf file information ** ** ** ******************************************************************* ******************************************************************/ /******************************************************************* ******************************************************************* ** ** Starting off with the headers ** ******************************************************************* ******************************************************************/ /* integer (from 0 to n-1) indicates position of header (-1 means header is not present) */ typedef struct{ int probeset_id; int type; int probeset_name; } header_0; /* integer (from 0 to n-1) indicates position of header (-1 means header is not present) */ typedef struct{ int atom_id; int type; int exon_position; } header_1; /* integer (from 0 to n-1) indicates position of header (-1 means header is not present) */ typedef struct{ int probe_id; int type; int gc_count; int probe_length; int interrogation_position; int probe_sequence; } header_2; typedef struct{ char **chip_type; int n_chip_type; char *lib_set_name; char *lib_set_version; char *pgf_format_version; char *header0_str; header_0 *header0; char *header1_str; header_1 *header1; char *header2_str; header_2 *header2; char *create_date; char *guid; char **other_headers_keys; char **other_headers_values; int n_other_headers; } pgf_headers; /******************************************************************** ******************************************************************* ** ** Structures for dealing with data stored at the probelevel ** ** ******************************************************************* *******************************************************************/ typedef struct{ int probe_id; char *type; int gc_count; int probe_length; int interrogation_position; char *probe_sequence; struct probe_list_node *next; } probe_list_node; typedef struct{ int n_probes; probe_list_node *first; } probe_list_header; /******************************************************************** ******************************************************************* ** ** Structures for dealing with data stored at the atom level ** ** ******************************************************************* *******************************************************************/ typedef struct{ int atom_id; char *type; char *exon_position; probe_list_header *probes; struct atom_list_node *next; } atom_list_node; typedef struct{ int n_atoms; atom_list_node *first; } atom_list_header; /******************************************************************* ******************************************************************* ** ** Structures for dealing with data as stored at the probeset level ** ** ** ******************************************************************* *******************************************************************/ typedef struct probeset_list_node *node_pointer; typedef struct{ int probeset_id; char *type; char *probeset_name; atom_list_header *atoms; struct probeset_list_node *next; } probeset_list_node; typedef struct{ int n_probesets; probeset_list_node *first; probeset_list_node *current; probeset_list_node *last; } probeset_list_header; /******************************************************************* ******************************************************************* ** ** Structure for storing pgf file (after it is read from file) ** ******************************************************************* ******************************************************************/ typedef struct{ pgf_headers *headers; probeset_list_header *probesets; } pgf_file; /******************************************************************* ******************************************************************* ** ** ** Code for splitting a string into a series of tokens ** ** ******************************************************************* *******************************************************************/ /*************************************************************** ** ** tokenset ** ** char **tokens - a array of token strings ** int n - number of tokens in this set. ** ** a structure to hold a set of tokens. Typically a tokenset is ** created by breaking a character string based upon a set of ** delimiters. ** ** **************************************************************/ typedef struct{ char **tokens; int n; } tokenset; /****************************************************************** ** ** tokenset *tokenize(char *str, char *delimiters) ** ** char *str - a string to break into tokens ** char *delimiters - delimiters to use in breaking up the line ** ** ** RETURNS a new tokenset ** ** Given a string, split into tokens based on a set of delimitors ** *****************************************************************/ static tokenset *tokenize(char *str, char *delimiters){ #if USE_PTHREADS char *tmp_pointer; #endif int i=0; char *current_token; tokenset *my_tokenset = Calloc(1,tokenset); my_tokenset->n=0; my_tokenset->tokens = NULL; #if USE_PTHREADS current_token = strtok_r(str,delimiters,&tmp_pointer); #else current_token = strtok(str,delimiters); #endif while (current_token != NULL){ my_tokenset->n++; my_tokenset->tokens = Realloc(my_tokenset->tokens,my_tokenset->n,char*); my_tokenset->tokens[i] = Calloc(strlen(current_token)+1,char); strcpy(my_tokenset->tokens[i],current_token); my_tokenset->tokens[i][(strlen(current_token))] = '\0'; i++; #if USE_PTHREADS current_token = strtok_r(NULL,delimiters,&tmp_pointer); #else current_token = strtok(NULL,delimiters); #endif } return my_tokenset; } /****************************************************************** ** ** int tokenset_size(tokenset *x) ** ** tokenset *x - a tokenset ** ** RETURNS the number of tokens in the tokenset ** ******************************************************************/ static int tokenset_size(tokenset *x){ return x->n; } /****************************************************************** ** ** char *get_token(tokenset *x, int i) ** ** tokenset *x - a tokenset ** int i - index of the token to return ** ** RETURNS pointer to the i'th token ** ******************************************************************/ static char *get_token(tokenset *x,int i){ return x->tokens[i]; } /****************************************************************** ** ** void delete_tokens(tokenset *x) ** ** tokenset *x - a tokenset ** ** Deallocates all the space allocated for a tokenset ** ******************************************************************/ static void delete_tokens(tokenset *x){ int i; for (i=0; i < x->n; i++){ Free(x->tokens[i]); } Free(x->tokens); Free(x); } /******************************************************************* ** ** int token_ends_with(char *token, char *ends) ** ** char *token - a string to check ** char *ends_in - we are looking for this string at the end of token ** ** ** returns 0 if no match, otherwise it returns the index of the first character ** which matchs the start of *ends. ** ** Note that there must be one additional character in "token" beyond ** the characters in "ends". So ** ** *token = "TestStr" ** *ends = "TestStr" ** ** would return 0 but if ** ** ends = "estStr" ** ** we would return 1. ** ** and if ** ** ends= "stStr" ** we would return 2 .....etc ** ** ******************************************************************/ static int token_ends_with(char *token, char *ends_in){ int tokenlength = strlen(token); int ends_length = strlen(ends_in); int start_pos; char *tmp_ptr; if (tokenlength <= ends_length){ /* token string is too short so can't possibly end with ends */ return 0; } start_pos = tokenlength - ends_length; tmp_ptr = &token[start_pos]; if (strcmp(tmp_ptr,ends_in)==0){ return start_pos; } else { return 0; } } /******************************************************************* ******************************************************************* ** ** Code for Reading from file ** ******************************************************************* *******************************************************************/ /**************************************************************** ** ** void ReadFileLine(char *buffer, int buffersize, FILE *currentFile) ** ** char *buffer - place to store contents of the line ** int buffersize - size of the buffer ** FILE *currentFile - FILE pointer to an opened CEL file. ** ** Read a line from a file, into a buffer of specified size. ** otherwise die. ** ***************************************************************/ static int ReadFileLine(char *buffer, int buffersize, FILE *currentFile){ if (fgets(buffer, buffersize, currentFile) == NULL){ return 0; //error("End of file reached unexpectedly. Perhaps this file is truncated.\n"); } return 1; } /**************************************************************** ** ** Code for identifying what type of information is stored in ** the current line ** ***************************************************************/ /**************************************************************** ** ** static int IsHeaderLine(char *buffer) ** ** char *buffer - contains line to evaluate ** ** Checks whether supplied line is a header line (ie starts with #%) ** ** return 1 (ie true) if header line. 0 otherwise ** ***************************************************************/ static int IsHeaderLine(char *buffer){ if (strncmp("#%",buffer,2) == 0){ return 1; } return 0; } /**************************************************************** ** ** static int IsHeaderLine(char *buffer) ** ** char *buffer - contains line to evaluate ** ** Checks whether supplied line is a comment line (ie starts with #) ** ** ***************************************************************/ static int IsCommentLine(char *buffer){ if (strncmp("#",buffer,1) == 0){ return 1; } return 0; } /***************************************************************** ** ** static int IsLevel2(char *buffer) ** ** char *buffer - contains line to evaluate ** ** checks whether supplied line begins with two tab characters it \t\t ** ** Return 1 if true, 0 otherwise ** ***************************************************************/ static int IsLevel2(char *buffer){ if (strncmp("\t\t",buffer,2) == 0){ return 1; } return 0; } /***************************************************************** ** ** static int IsLevel1(char *buffer) ** ** char *buffer - contains line to evaluate ** ** checks whether supplied line begins with a single tab characters it \t ** ** Return 1 if true, 0 otherwise ** ***************************************************************/ static int IsLevel1(char *buffer){ if (strncmp("\t",buffer,1) == 0){ if (strncmp("\t\t",buffer,2) != 0){ return 1; } return 0; } return 0; } /**************************************************************** **************************************************************** ** ** Code for deallocating or initializing header data structures ** **************************************************************** ****************************************************************/ void dealloc_pgf_headers(pgf_headers *header){ int i; if (header->n_chip_type > 0){ for (i = 0; i < header->n_chip_type; i++){ Free(header->chip_type[i]); } Free(header->chip_type); } if (header->lib_set_name != NULL){ Free(header->lib_set_name); } if (header->lib_set_version != NULL){ Free(header->lib_set_version); } if (header->pgf_format_version != NULL){ Free(header->pgf_format_version); } if (header->header0_str != NULL){ Free(header->header0_str); Free(header->header0); } if (header->header1_str != NULL){ Free(header->header1_str); Free(header->header1); } if (header->header2_str != NULL){ Free(header->header2_str); Free(header->header2); } if (header->create_date != NULL){ Free(header->create_date); } if (header->guid != NULL){ Free(header->guid); } if (header->n_other_headers > 0){ for (i = 0; i < header->n_other_headers; i++){ Free(header->other_headers_keys[i]); Free(header->other_headers_values[i]); } Free(header->other_headers_keys); Free(header->other_headers_values); } } void dealloc_probes(probe_list_header *probes){ probe_list_node *temp_node; if (probes->first != NULL){ temp_node = probes->first; while (temp_node != NULL){ probes->first = (probe_list_node *)temp_node->next; if (temp_node->type != NULL){ Free(temp_node->type); } if (temp_node->probe_sequence != NULL){ Free(temp_node->probe_sequence); } Free(temp_node); temp_node = probes->first; } } } void dealloc_atoms(atom_list_header *atoms){ atom_list_node *temp_node; if (atoms->first != NULL){ temp_node = atoms->first; while (temp_node != NULL){ atoms->first = (atom_list_node *)temp_node->next; if (temp_node->type != NULL){ Free(temp_node->type); } if (temp_node->exon_position != NULL){ Free(temp_node->exon_position); } if (temp_node->probes != NULL){ dealloc_probes(temp_node->probes); Free(temp_node->probes); } Free(temp_node); temp_node = atoms->first; } } } void dealloc_pgf_probesets(probeset_list_header *probesets){ probeset_list_node *temp_node; if (probesets->first != NULL){ temp_node = probesets->first; while (temp_node != NULL){ probesets->first = (probeset_list_node *)temp_node->next; if (temp_node->type != NULL){ Free(temp_node->type); } if (temp_node->probeset_name != NULL){ Free(temp_node->probeset_name); } if (temp_node->atoms != NULL){ dealloc_atoms(temp_node->atoms); Free(temp_node->atoms); } Free(temp_node); temp_node = probesets->first; } } } void dealloc_pgf_file(pgf_file* my_pgf){ if (my_pgf->headers != NULL){ dealloc_pgf_headers(my_pgf->headers); Free(my_pgf->headers); } if (my_pgf->probesets !=NULL){ dealloc_pgf_probesets(my_pgf->probesets); Free(my_pgf->probesets); } } void initialize_pgf_header(pgf_headers *header){ header->chip_type = NULL; header->n_chip_type = 0; header->lib_set_name= NULL; header->lib_set_version= NULL; header->pgf_format_version= NULL; header->header0_str= NULL; header->header0= NULL; header->header1_str= NULL; header->header1= NULL; header->header2_str= NULL; header->header2= NULL; header->create_date= NULL; header->guid= NULL; header->other_headers_keys= NULL; header->other_headers_values= NULL; header->n_other_headers=0; } /**************************************************************** **************************************************************** ** ** Code for figuring out column ordering ** **************************************************************** ***************************************************************/ static void determine_order_header0(char *header_str, header_0 *header0){ tokenset *cur_tokenset; int i; char *temp_str = Calloc(strlen(header_str) +1, char); strcpy(temp_str,header_str); header0->probeset_id = -1; header0->type = -1; header0->probeset_name = -1; cur_tokenset = tokenize(temp_str,"\t\r\n"); for (i=0; i < tokenset_size(cur_tokenset); i++){ if (strcmp(get_token(cur_tokenset,i),"probeset_id")==0){ header0->probeset_id = i; } else if (strcmp(get_token(cur_tokenset,i),"type")==0){ header0->type = i; } else if (strcmp(get_token(cur_tokenset,i),"type")==0){ header0->probeset_name = i; } } delete_tokens(cur_tokenset); Free(temp_str); } static void determine_order_header1(char *header_str, header_1 *header1){ tokenset *cur_tokenset; int i; char *temp_str = Calloc(strlen(header_str) +1, char); strcpy(temp_str,header_str); header1->atom_id = -1; header1->type = -1; header1->exon_position = -1; cur_tokenset = tokenize(temp_str,"\t\r\n"); for (i=0; i < tokenset_size(cur_tokenset); i++){ if (strcmp(get_token(cur_tokenset,i),"atom_id")==0){ header1->atom_id = i; } else if (strcmp(get_token(cur_tokenset,i),"type")==0){ header1->type = i; } else if (strcmp(get_token(cur_tokenset,i),"exon_position")==0){ header1->exon_position = i; } } delete_tokens(cur_tokenset); Free(temp_str); } static void determine_order_header2(char *header_str, header_2 *header2){ tokenset *cur_tokenset; int i; char *temp_str = Calloc(strlen(header_str) +1, char); strcpy(temp_str,header_str); header2->probe_id = -1; header2->type = -1; header2->gc_count = -1; header2->probe_length = -1; header2->interrogation_position = -1; header2->probe_sequence = -1; cur_tokenset = tokenize(temp_str,"\t\r\n"); for (i=0; i < tokenset_size(cur_tokenset); i++){ if (strcmp(get_token(cur_tokenset,i),"probe_id")==0){ header2->probe_id = i; } else if (strcmp(get_token(cur_tokenset,i),"type")==0){ header2->type = i; } else if (strcmp(get_token(cur_tokenset,i),"gc_count")==0){ header2->gc_count = i; } else if (strcmp(get_token(cur_tokenset,i),"probe_length")==0){ header2->probe_length = i; } else if (strcmp(get_token(cur_tokenset,i),"interrogation_position")==0){ header2->interrogation_position = i; } else if (strcmp(get_token(cur_tokenset,i),"probe_sequence")==0){ header2->probe_sequence = i; } } delete_tokens(cur_tokenset); Free(temp_str); } /**************************************************************** ** ** Validate that required headers are present in file. ** ** Return 0 if an expected header is not present. ** Returns 1 otherwise (ie everything looks fine) ** ***************************************************************/ static int validate_pgf_header(pgf_headers *header){ /* check that required headers are all there (have been read) */ if (header->chip_type == NULL) return 0; if (header->lib_set_name == NULL) return 0; if (header->lib_set_version == NULL) return 0; if (header->pgf_format_version == NULL) return 0; if (header->header0_str == NULL) return 0; if (header->header1_str == NULL) return 0; if (header->header2_str == NULL) return 0; /* Check that format version is 1.0 (only supported version) */ if (strcmp( header->pgf_format_version,"1.0") != 0){ return 0; } /* check that header0, header1, header2 (ie the three levels of headers) have required fields */ if (header->header0->probeset_id == -1) return 0; if (header->header1->atom_id == -1) return 0; if (header->header2->probe_id == -1) return 0; if (header->header2->type == -1) return 0; return 1; } /**************************************************************** **************************************************************** ** ** Code for actually reading from the file ** **************************************************************** ***************************************************************/ static FILE *open_pgf_file(const char *filename){ const char *mode = "r"; FILE *currentFile = NULL; currentFile = fopen(filename,mode); if (currentFile == NULL){ error("Could not open file %s", filename); } return currentFile; } /**************************************************************** ** ** Reading the header ** ***************************************************************/ void read_pgf_header(FILE *cur_file, char *buffer, pgf_headers *header){ tokenset *cur_tokenset; char *temp_str; initialize_pgf_header(header); do { ReadFileLine(buffer, 1024, cur_file); /* Rprintf("%s\n",buffer); */ if (IsHeaderLine(buffer)){ cur_tokenset = tokenize(&buffer[2],"=\r\n"); /* hopefully token 0 is Key and token 1 is Value */ /* Rprintf("Key is: %s\n",get_token(cur_tokenset,0)); Rprintf("Value is: %s\n",get_token(cur_tokenset,1)); */ /* Decode the Key/Value pair */ if (strcmp(get_token(cur_tokenset,0),"chip_type") == 0){ if (header->n_chip_type == 0){ header->chip_type = Calloc(1, char *); } else { header->chip_type = Realloc(header->chip_type, header->n_chip_type+1, char *); } temp_str = Calloc(strlen(get_token(cur_tokenset,1))+1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->chip_type[header->n_chip_type] = temp_str; header->n_chip_type++; } else if (strcmp(get_token(cur_tokenset,0), "lib_set_name") == 0){ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->lib_set_name = temp_str; } else if (strcmp(get_token(cur_tokenset,0), "lib_set_version") == 0){ temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->lib_set_version = temp_str; } else if (strcmp(get_token(cur_tokenset,0), "pgf_format_version") == 0) { temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->pgf_format_version = temp_str; } else if (strcmp(get_token(cur_tokenset,0), "header0") == 0) { temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->header0_str = temp_str; header->header0 = Calloc(1,header_0); determine_order_header0(header->header0_str,header->header0); } else if (strcmp(get_token(cur_tokenset,0), "header1") == 0) { temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->header1_str = temp_str; header->header1 = Calloc(1,header_1); determine_order_header1(header->header1_str,header->header1); } else if (strcmp(get_token(cur_tokenset,0), "header2") == 0) { temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->header2_str = temp_str; header->header2 = Calloc(1,header_2); determine_order_header2(header->header2_str,header->header2); } else if (strcmp(get_token(cur_tokenset,0), "create_date") == 0) { temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->create_date = temp_str; } else if (strcmp(get_token(cur_tokenset,0), "guid") == 0) { temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->guid = temp_str; } else { /* not one of the recognised header types */ if ( header->n_other_headers == 0){ header->other_headers_keys = Calloc(1, char *); header->other_headers_values = Calloc(1, char *); } else { header->other_headers_keys = Realloc(header->other_headers_keys,header->n_other_headers+1, char *); header->other_headers_values = Realloc(header->other_headers_values,header->n_other_headers+1, char *); header->chip_type = Realloc(header->chip_type, header->n_chip_type+1, char *); } temp_str = Calloc(strlen(get_token(cur_tokenset,1)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,1)); header->other_headers_values[header->n_other_headers] = temp_str; temp_str = Calloc(strlen(get_token(cur_tokenset,0)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,0)); header->other_headers_keys[header->n_other_headers] = temp_str; header->n_other_headers++; } delete_tokens(cur_tokenset); } } while (IsHeaderLine(buffer)); } /**************************************************************** ** ** Reading the probesets/body of the file ** ***************************************************************/ void initialize_probeset_list(probeset_list_header *probeset_list){ probeset_list->n_probesets = 0; probeset_list->first = NULL; probeset_list->current = NULL; probeset_list->last = NULL; } void insert_probe(char *buffer, probe_list_header *probe_list, header_2 *header2){ char *temp_str; tokenset *cur_tokenset; probe_list_node *temp_ptr; probe_list_node *temp_node = Calloc(1,probe_list_node); cur_tokenset = tokenize(buffer,"\t\r\n"); temp_node->probe_id = atoi(get_token(cur_tokenset,header2->probe_id)); if (header2->type != -1){ temp_str = Calloc(strlen(get_token(cur_tokenset,header2->type)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,header2->type)); temp_node->type = temp_str; } if (header2->gc_count != -1){ temp_node->gc_count = atoi(get_token(cur_tokenset,header2->gc_count)); } if (header2->probe_length != -1){ temp_node->probe_length = atoi(get_token(cur_tokenset,header2->probe_length)); } if (header2->interrogation_position != -1){ temp_node->interrogation_position = atoi(get_token(cur_tokenset,header2->interrogation_position)); } if (header2->probe_sequence != -1){ temp_str = Calloc(strlen(get_token(cur_tokenset,header2->probe_sequence)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,header2->probe_sequence)); temp_node->probe_sequence = temp_str; } temp_node->next = NULL; if (probe_list->n_probes == 0){ probe_list->first = temp_node; probe_list->n_probes = 1; } else { /* figure out where the end of the list is. Insert there */ temp_ptr = probe_list->first; while (temp_ptr->next != NULL){ temp_ptr = (probe_list_node*)temp_ptr->next; } temp_ptr->next = (struct probe_list_node*)temp_node; probe_list->n_probes++; } delete_tokens(cur_tokenset); } void insert_level2(char *buffer, probeset_list_header *probeset_list, header_2 *header2){ atom_list_header *current_level1; atom_list_node *current_atom; if (probeset_list->current == NULL){ /* Oh Boy, this is a problem no header0 level object to insert into. */ error("Can not read a level 2 line before seeing a level 0 line. File corrupted?"); } if (probeset_list->current->atoms == NULL){ /* Oh Boy, this is a problem no header1 level object to insert into. */ error("Can not read a level 2 line before seeing a level 1 line. File corrupted?"); } current_level1 = probeset_list->current->atoms; current_atom = current_level1->first; while (current_atom->next != NULL){ current_atom = (atom_list_node *)current_atom->next; } if (current_atom->probes == NULL){ current_atom->probes = Calloc(1,probe_list_header); } insert_probe(buffer, current_atom->probes, header2); } void insert_atom(char *buffer, atom_list_header *atoms_list, header_1 *header1){ char *temp_str; tokenset *cur_tokenset; atom_list_node *temp_ptr; atom_list_node *temp_node = Calloc(1,atom_list_node); cur_tokenset = tokenize(buffer,"\t\r\n"); temp_node->atom_id = atoi(get_token(cur_tokenset,header1->atom_id)); if (header1->type != -1){ temp_str = Calloc(strlen(get_token(cur_tokenset,header1->type)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,header1->type)); temp_node->type = temp_str; } if (header1->exon_position != -1){ temp_str = Calloc(strlen(get_token(cur_tokenset,header1->exon_position)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,header1->exon_position)); temp_node->exon_position = temp_str; } temp_node->probes = NULL; temp_node->next = NULL; if (atoms_list->n_atoms == 0){ atoms_list->first = temp_node; atoms_list->n_atoms = 1; } else { /* figure out where the end of the list is. Insert there */ temp_ptr = (atom_list_node*)atoms_list->first; while (temp_ptr->next != NULL){ temp_ptr= (atom_list_node*)temp_ptr->next; } temp_ptr->next = (struct atom_list_node*)temp_node; atoms_list->n_atoms++; } delete_tokens(cur_tokenset); } void insert_level1(char *buffer, probeset_list_header *probeset_list, header_1 *header1){ probeset_list_node *current_level0; if (probeset_list->current == NULL){ /* Oh Boy, this is a problem no header0 level object to insert into. */ error("Can not read a level 1 line before seeing a level 0 line. File corrupted?"); } current_level0 = probeset_list->current; if (current_level0->atoms == NULL){ current_level0->atoms = Calloc(1,atom_list_header); } /* Now lets insert the data */ insert_atom(buffer, current_level0->atoms, header1); } void insert_level0(char *buffer, probeset_list_header *probeset_list, header_0 *header0){ char *temp_str; tokenset *cur_tokenset; probeset_list_node *temp_node = Calloc(1,probeset_list_node); cur_tokenset = tokenize(buffer,"\t\r\n"); temp_node->probeset_id = atoi(get_token(cur_tokenset,header0->probeset_id)); if (header0->type != -1){ temp_str = Calloc(strlen(get_token(cur_tokenset,header0->type)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,header0->type)); temp_node->type = temp_str; } if (header0->probeset_name != -1){ temp_str = Calloc(strlen(get_token(cur_tokenset,header0->probeset_name)) + 1,char); strcpy(temp_str,get_token(cur_tokenset,header0->probeset_name)); temp_node->probeset_name = temp_str; } temp_node->atoms = NULL; temp_node->next = NULL; if (probeset_list->first == NULL){ probeset_list->first = temp_node; probeset_list->current = temp_node; probeset_list->last = temp_node; probeset_list->n_probesets = 1; } else { probeset_list->last->next = (struct probeset_list_node *)temp_node; probeset_list->last = temp_node; probeset_list->current = temp_node; probeset_list->n_probesets++; } delete_tokens(cur_tokenset); } void read_pgf_probesets(FILE *cur_file, char *buffer, probeset_list_header *probeset_list, pgf_headers *header){ initialize_probeset_list(probeset_list); insert_level0(buffer, probeset_list, header->header0); while(ReadFileLine(buffer, 1024, cur_file)){ if (IsLevel2(buffer)){ insert_level2(buffer, probeset_list, header->header2); } else if (IsLevel1(buffer)){ insert_level1(buffer, probeset_list, header->header1); } else if (IsCommentLine(buffer)){ /*Ignore */ } else { insert_level0(buffer, probeset_list, header->header0); } } } /**************************************************************** **************************************************************** ** ** Funtionality for counting probeset types ** **************************************************************** ****************************************************************/ typedef struct{ char *type; int count; } probeset_type_list; probeset_type_list *pgf_count_probeset_types(pgf_file *my_pgf, int *number){ probeset_type_list *my_type_list = Calloc(1,probeset_type_list); char *cur_type; int n; /* traverse the probesets. each time examining the probeset type */ if (my_pgf->probesets != NULL){ if (my_pgf->probesets->first != NULL){ my_pgf->probesets->current = my_pgf->probesets->first; if (my_pgf->probesets->current->type == NULL){ my_type_list[0].type = Calloc(5,char); strcpy(my_type_list[0].type,"none"); } else { my_type_list[0].type = Calloc(strlen(my_pgf->probesets->current->type) + 1,char); strcpy(my_type_list[0].type,my_pgf->probesets->current->type); } my_type_list[0].count = 1; *number = 1; /* number of different types seen */ while (my_pgf->probesets->current->next != NULL){ my_pgf->probesets->current= (probeset_list_node *)my_pgf->probesets->current->next; if (my_pgf->probesets->current->type == NULL){ cur_type = "none"; } else { cur_type = my_pgf->probesets->current->type; } n = 0; while (n < *number){ if (strcmp(cur_type,my_type_list[n].type) == 0){ break; } n++; } if (n == *number){ my_type_list = Realloc(my_type_list,(n+1),probeset_type_list); my_type_list[n].type = Calloc(strlen(cur_type) + 1,char); strcpy(my_type_list[n].type,cur_type); my_type_list[n].count = 1; *number = *number + 1; } else { my_type_list[n].count++; } } } } return my_type_list; } void dealloc_probeset_type_list(probeset_type_list *my_type_list, int length){ int i; for (i = 0; i < length; i++){ Free(my_type_list[i].type); } Free(my_type_list); } /**************************************************************** **************************************************************** ** ** Functionality for testing the parsers (from R .C interface) ** **************************************************************** ****************************************************************/ void read_pgf_file(char **filename){ FILE *cur_file; pgf_file my_pgf; char *buffer = Calloc(1024, char); probeset_type_list *my_probeset_types; int ntypes; cur_file = open_pgf_file(filename[0]); my_pgf.headers = Calloc(1, pgf_headers); my_pgf.probesets = Calloc(1, probeset_list_header); read_pgf_header(cur_file,buffer,my_pgf.headers); if (validate_pgf_header(my_pgf.headers)){ read_pgf_probesets(cur_file, buffer, my_pgf.probesets, my_pgf.headers); my_probeset_types = pgf_count_probeset_types(&my_pgf, &ntypes); dealloc_probeset_type_list(my_probeset_types, ntypes); } Free(buffer); dealloc_pgf_file(&my_pgf); fclose(cur_file); }