affxparser/.Rinstignore0000644000175200017520000000040614516003651016242 0ustar00biocbuildbiocbuilddoc/Makefile$ # Certain LaTeX files (e.g. bib, bst, sty) must be part of the build # such that they are available for R CMD check. These are excluded # from the install using .Rinstignore in the top-level directory # such as this one. doc/.*[.](bib|bst|sty)$ affxparser/DESCRIPTION0000644000175200017520000000333314516022540015444 0ustar00biocbuildbiocbuildPackage: affxparser Version: 1.74.0 Depends: R (>= 2.14.0) Suggests: R.oo (>= 1.22.0), R.utils (>= 2.7.0), AffymetrixDataTestFiles Title: Affymetrix File Parsing SDK Authors@R: c( person("Henrik", "Bengtsson", role=c("aut")), person("James", "Bullard", role=c("aut")), person("Robert", "Gentleman", role=c("ctb")), person("Kasper Daniel", "Hansen", role=c("aut", "cre"), email="kasperdanielhansen@gmail.com"), person("Jim", "Hester", role=c("ctb")), person("Martin", "Morgan", role=c("ctb"))) Author: Henrik Bengtsson [aut], James Bullard [aut], Robert Gentleman [ctb], Kasper Daniel Hansen [aut, cre], Jim Hester [ctb], Martin Morgan [ctb] Maintainer: Kasper Daniel Hansen Description: Package for parsing Affymetrix files (CDF, CEL, CHP, BPMAP, BAR). It provides methods for fast and memory efficient parsing of Affymetrix files using the Affymetrix' Fusion SDK. Both ASCII- and binary-based files are supported. Currently, there are methods for reading chip definition file (CDF) and a cell intensity file (CEL). These files can be read either in full or in part. For example, probe signals from a few probesets can be extracted very quickly from a set of CEL files into a convenient list structure. Note: Fusion SDK v1.1.2 License: LGPL (>= 2) LazyLoad: yes URL: https://github.com/HenrikBengtsson/affxparser BugReports: https://github.com/HenrikBengtsson/affxparser/issues biocViews: Infrastructure, DataImport, Microarray, ProprietaryPlatforms, OneChannel git_url: https://git.bioconductor.org/packages/affxparser git_branch: RELEASE_3_18 git_last_commit: cada520 git_last_commit_date: 2023-10-24 Date/Publication: 2023-10-24 NeedsCompilation: yes Packaged: 2023-10-24 20:13:20 UTC; biocbuild affxparser/NAMESPACE0000644000175200017520000000030214516003651015150 0ustar00biocbuildbiocbuild## ## NAMESPACE file for the affxparser package. ## useDynLib(affxparser, .registration = TRUE) importFrom("stats", "df") importFrom("utils", "packageDescription") exportPattern("^([^R\\.])") affxparser/NEWS.md0000644000175200017520000012336214516003651015043 0ustar00biocbuildbiocbuild# Version 1.73.0 [2023-04-25] ## Notes * The version number was bumped for the Bioconductor develop version, which is now Bioconductor 3.18 for R (>= 4.4.0). # Version 1.72.0 [2023-04-25] ## Notes * The version number was bumped for the Bioconductor release version, which is now Bioconductor 3.17 for R (>= 4.3.0). # Version 1.71.2 [2023-04-23] ### Bug Fixes * fix to `src/_mingw.h` provided by Tomas Kalibera. # Version 1.71.1 [2023-04-04] ### Bug Fixes * Fix two instances of "watching polymorphic type 'class Except' by value [-Wcatch-value=]" compiler warnings. # Version 1.71.0 [2022-11-01] ## Notes * The version number was bumped for the Bioconductor devel version, which is now Bioconductor 3.17 for R devel. # Version 1.70.0 [2022-11-01] ## Miscellaneous * Remove extra backslash escaping in a few help pages. ## Notes * The version number was bumped for the Bioconductor release version, which is now Bioconductor 3.16 for R (>= 4.2.2). # Version 1.69.1 [2022-04-28] ## Bug Fixes * Ported bug fix from **affxparser** 1.68.1. # Version 1.69.0 [2022-04-26] # Version 1.68.1 [2022-04-28] ## Bug Fixes * **affxparser** (>= 1.67.1) failed to install with R built with `-fpic` flag. The symptom was a linking error `ld: 000.init.o: relocation R_X86_64_32 against '.rodata' can not be used when making a shared object; recompile with -fPIC collect2: error: ld returned 1 exit status`. # Version 1.68.0 [2022-04-26] # Version 1.67.1 [2022-03-23] ## Significant Changes * This packages requires R (>= 4.0.0) when build on MS Windows. This is due to the added support for UCRT on MS Windows, which is required for the upcoming R 4.2.0. ## Software Quality * Updates to build package from source on MS Windows with UCRT. Thanks to Tomas Kalibera for the contribution. * Now registering native routines - apparently never happened before. # Version 1.67.0 [2021-10-27] # Version 1.66.0 [2021-10-27] # Version 1.65.3 [2021-09-22] ## Software Quality * Making sure all pathnames are of length 100 or shorter. # Version 1.65.2 [2021-09-22] ## Software Quality * Now properly registering native routines. # Version 1.65.1 [2021-09-09] ## Bug Fixes * The package did not install on macOS with the M1 chip with error: `use of undeclared identifier 'finite'; did you mean 'isfinite'?`. This issue goes back to 2014, when macOS produced `warning: 'finite' is deprecated: first deprecated in OS X 10.9 [-Wdeprecated-declarations]. isOk = finite(x);`. Patched by using `isfinite()` instead of `finite()`. # Version 1.64.0 [2021-05-19] # Version 1.62.0 [2020-10-27] # Version 1.60.0 [2020-04-27] # Version 1.58.0 [2019-10-29] # Version 1.56.0 [2019-05-02] # Version 1.55.0 [2018-10-30] # Version 1.54.0 [2018-10-30] # Version 1.53.2 [2018-10-22] ## Documentation * Link to Affx Fusion SDK archive on GitHub. * Spell corrections. # Version 1.53.1 [2018-08-28] # Version 1.53.0 [2018-04-30] # Version 1.52.0 [2018-04-30] # Version 1.51.0 [2017-10-30] # Version 1.50.0 [2017-10-30] # Version 1.49.0 [2017-04-23] # Version 1.48.0 [2017-04-23] # Version 1.47.0 [2016-10-18] ## Notes * The version number was bumped for the Bioconductor devel version, which is now Bioconductor 3.5 for R (>= 3.4.0). # Version 1.46.0 [2016-10-18] ## Notes * The version number was bumped for the Bioconductor release version, which is now Bioconductor 3.4 for R (>= 3.3.1). # Version 1.45.1 [2016-09-16] ## Code Refactoring * Using `c(x,y)` instead of `append(x,y)` internally. * CLEANUP: Dropped obsolete `src/R_affx_test.*cmdline.cpp` files. # Version 1.45.0 [2015-05-03] ## Notes * The version number was bumped for the Bioconductor devel version, which is now Bioconductor 3.4 for R (>= 3.3.0). # Version 1.44.0 [2015-05-03] ## Notes * The version number was bumped for the Bioconductor release version, which is now Bioconductor 3.3 for R (>= 3.3.0). # Version 1.43.2 [2016-04-05] ## New Features * WINDOWS: Package now compiles with both the old gcc-4.6.3 toolchain as well as the new gcc-4.9.3 toolchain - introduced in R (>= 3.3.0). Thanks to Jim Hester and Dan Tenenbaum for help with this. # Version 1.43.1 [2016-02-28] ## New Features * The DLL is now unloaded when the package is unloaded. ## Bug Fixes * Fixed a bug related to including `` and extern C, reported by Brian Ripley. # Version 1.43.0 [2015-10-23] ## Notes * The version number was bumped for the Bioconductor devel version, which is now Bioconductor 3.3 for R (>= 3.3.0). # Version 1.42.0 [2015-10-13] ## Notes * The version number was bumped for the Bioconductor release version, which is now Bioconductor 3.2 for R (>= 3.2.2). # Version 1.41.7 [2015-09-14] ## Code Refactoring * ROBUSTNESS: Explicitly importing core R functions. # Version 1.41.6 [2015-07-29] ## Notes * Updated the `BiocViews` field of DESCRIPTION. # Version 1.41.5 [2015-06-17] ## Notes * New maintainer address (in all fields). # Version 1.41.4 [2015-05-26] ## Notes * New maintainer address. # Version 1.41.3 [2015-05-13] ## Significant Changes * AVAILABILITY: Removed requirement for 'GNU make'. # Version 1.41.2 [2015-05-05] ## Bug Fixes * `readCelHeader()` and `readCel()` would core dump R/affxparser if trying to read multi-channel CEL files (Issue #16). Now an error is generated instead. Multi-channel CEL files (e.g. Axiom) are not supported by **affxparser**. Thanks to Kevin McLoughlin (Lawrence Livermore National Laboratory, USA) for reporting on this. * `readCelHeader()` and `readCel()` on corrupt CEL files could core dump R/affparser (Issues #13 & #15). Now an error is generated instead. Thanks to Benilton Carvalho (Universidade Estadual de Campinas, Sao Paulo, Brazil) and Malte Bismarck (Martin Luther University of Halle-Wittenberg) for reports. # Version 1.41.1 [2015-04-25] ## Bug Fixes * Native functions `R_affx_GetCHPEntries()` and `R_affx_ReadCHP()` had unbalanced `PROTECT()`/`UNPROTECT()`. Also, native `R_affx_GetCHPGenotypingResults()` had two non-`PROTECT()`:ed usages of `mkString()`. Thanks to Tomas Kalibera at Northeastern University for reporting on this. # Version 1.41.0 [2015-04-16] ## Notes * The version number was bumped for the Bioconductor devel version, which is now Bioconductor 3.2 for R (>= 3.3.0). # Version 1.40.0 [2015-04-16] ## Notes * The version number was bumped for the Bioconductor release version, which is now Bioconductor 3.1 for R (>= 3.2.0). # Version 1.39.5 [2015-04-15] ## Significant Changes * Removed `SystemRequirements: GNU make`. ## New Features * ROBUSTNESS: Now `readPgfEnv()`/`readPgf()` validated `indices`, iff possible. * Now `readPgfEnv()`/`readPgf()` coerces some header fields to integers, iff they exists, specifically `num-cols`, `num-rows`, `probesets`, and `datalines`. * CLEANUP: Package no longer gives `readBin()` warnings on `'signed = FALSE' is only valid for integers of sizes 1 and 2`. ## Bug Fixes * `convertCel()` on a CCG/v1 CEL file could give `Error in sprintf("GridCorner%s=%d %d\n" ... invalid format '%d' ...)`. Added package test for `convertCel()`, but in this particular case it would not have cought it because it only happened for chip types of particular dimensions. Thanks to Malte Bismarck at UK Halle (Germany) for reporting on this. # Version 1.39.4 [2015-01-18] ## Significant Changes * ROBUSTNESS: 'GNU make' is a `SystemRequirements` (for now). ## Code Refactoring * ROBUSTNESS: Did not seem to be needed, but package is now a good citizen and do `library.dynlib.unload()` when unloaded. * Now using `requireNamespace()` instead of `require()`. * Internal cleanup of native code. # Version 1.39.3 [2014-11-26] ## Bug Fixes * `readPgf()` and `readPgfEnv()` failed to read all units (probesets) on some systems. Extensive package tests have been added to test this and other cases. Thanks to Grischa Toedt at EMBL Germany for reporting on, troubleshooting, and helping out with patches for this bug. # Version 1.39.2 [2014-10-28] ## Bug Fixes * The range test of argument `units` to `readCdf()` and `readCdfQc()` was never performed due to a typo, meaning it was possible to request units out of range. Depending on system this could result in either a core dump or random garbage read for the out of range units. ## Software Quality * ROBUSTNESS: Added package system tests for out of range `units` and `indices` arguments for most read functions. # Version 1.39.1 [2014-10-26] ## New Features * ROBUSTNESS: Now all methods gives an informative error message if zero elements are requested, i.e. via zero-length argument `indices` or `units` that is not NULL. Previously this case would access all values just like NULL does. * ROBUSTNESS: Now `readCelRectangle()` gives an informative error message if argument `xrange` or `yrange` is not of length two. ## Bug Fixes * `readPgf()` and `readPgfEnv()` would give an error if argument `indices` was specifies as a double rather than as an integer vector. # Version 1.39.0 [2014-10-13] ## Notes * The version number was bumped for the Bioconductor devel version, which is now Bioconductor 3.1 for R (>= 3.2.0). # Version 1.38.0 [2014-10-13] ## Notes * The version number was bumped for the Bioconductor release version, which is now Bioconductor 3.0 for R (>= 3.1.1). # Version 1.37.2 [2014-09-28] ## Notes * Minor modifications due to the move to GitHub. # Version 1.37.1 [2014-08-25] ## Software Qualitity * Removed `R CMD check` NOTEs that appeared in recent R versions. # Version 1.37.0 [2014-04-11] ## Notes * The version number was bumped for the Bioconductor devel version, which is now Bioconductor 2.15 for R (>= 3.1.0). # Version 1.36.0 [2014-04-11] ## Notes * The version number was bumped for the Bioconductor release version, which is now Bioconductor 2.14 for R (>= 3.1.0). # Version 1.35.3 [2014-02-28] ## Notes * Same updates as in release v1.34.2. # Version 1.35.2 [2014-02-28] ## Software Quality * Patches to Fusion SDK based on clang v3.4. # Version 1.35.1 [2014-02-27] ## Notes * Same updates as in release v1.34.1. # Version 1.35.0 [2013-10-14] ## Notes * The version number was bumped for the Bioconductor devel version. # Version 1.34.2 [2014-02-28] ## Code Refactoring * CLEANUP: Removed unnecessary usage of `:::`. # Version 1.34.1 [2014-02-27] ## Bug Fixes * `readCelUnits()` could throw `Error in vector("double", nbrOfCells * nbrOfArrays) : vector size cannot be NA. In addition: Warning message: In nbrOfCells * nbrOfArrays : NAs produced by integer overflow` when reading from a large number of arrays and/or a large number of units. Previously the limit of `nbrOfCells*nbrOfArrays` was `.Machine$integer.max` (=2147483647), whereas now it is `.Machine$double.xmax` (=1.797693e+308). Thanks to Damian Plichta at the Technical University of Denmark for reporting on this. # Version 1.34.0 [2012-10-14] ## Notes * The version number was bumped for the Bioconductor release version, which is now Bioconductor 2.13 for R (>= 3.0.0). # Version 1.33.4 [2013-09-23] ## Performance * Package now uses `which()` instead of `whichVector()` of **R.utils**. Before R (< 2.11.0), `which()` used to be 10x slower than `whichVector()`, but now it's 3x faster. # Version 1.33.3 [2013-06-29] ## Notes * Same updates as in release v1.32.3. # Version 1.33.2 [2013-05-25] ## Notes * Same updates as in release v1.32.2. # Version 1.33.1 [2013-05-20] ## Notes * Same updates as in release v1.32.1. # Version 1.33.0 [2013-04-03] ## Notes * The version number was bumped for the Bioconductor devel version. # Version 1.32.3 [2013-06-29] ## Bug Fixes * Since **affxparser** 1.30.2/1.31.2 (r72352; 2013-01-08), `writeCdf()` would incorrectly encode the unit types, iff the input `cdf` argument specified them as integers, e.g. as done by `writeCdf()` for `AffyGenePDInfo` in **aroma.affymetrix**. More specifically, the unit type index would be off by one, e.g. an `expression` unit (1) would be encoded as an `unknown` unit (0) and so on. On the other hand, if they were specified by their unit-type names (e.g. 'expression') the encoding should still be correct, e.g. if input is constructed from `readCdf()` of **affxparser**. Thanks to Guido Hooiveld at Wageningen UR (The Netherlands) for reporting on this. * Similarily, `writeCdf()` has "always", at least **affxparser** 1.7.4 since (r21888; 2007-01-09), encoded unit directions and QC unit types incorrectly, iff they were specified as integers. # Version 1.32.2 [2013-05-25] ## Peformance * Removed all remaining `gc()` calls. * Replaced all `rm()` calls with NULL assignments. # Version 1.32.1 [2013-05-20] ## Code Refactoring * CRAN POLICY: Now all Rd `\usage{}` lines are at most 90 characters long. # Version 1.32.0 [2013-04-03] ## Notes * The version number was bumped for the Bioconductor release version. # Version 1.31.4 [2013-03-19] ## Code Refactoring * Made `example(invertMap)` a bit faster so `R CMD check` won't complain. # Version 1.31.3 [2013-03-18] ## Code Refactoring * Internal `isPackageLoaded()` of `findFiles()` no longer uses defunct `manglePackageName()` function. # Version 1.31.2 [2013-01-07] ## Notes * Same updates as in release v1.30.2. # Version 1.31.1 [2012-10-18] ## New Features * Now `compareCdfs()` gives a more precise `reason` attribute when there is a difference in (regular or QC) units. It narrows down the first unit that differs and reports it unit number. # Version 1.31.0 [2012-10-01] ## Notes * The version number was bumped for the Bioconductor devel version. # Version 1.30.2 [2013-01-07] ## Bug Fixes * `writeCdf()` did not encode unit types as decoded by `readCdf()`. Unit type `unknown` was incorrectly encoded such that `readCdf()` would decode it as `copynumber`. Also, unit types `genotypingcontrol` and `expressioncontrol` where not encoded at all. # Version 1.30.0 [2012-10-01] ## Notes * The version number was bumped for the Bioconductor release version. # Version 1.29.13 [2012-09-26] ## New Features * Added argument '`cdf=FALSE'` to `createCel()`. Note, the previous implementation corresponded to `cdf=TRUE`. ## Software Quality * ROBUSTNESS: Now `createCel()` validates/sets CEL header field `total` based on `cols` and `rows`. * ROBUSTNESS: Added a system test for validating that the package can write and read a CEL. The test is spawning of another R process so that the test is robust against core dumps. # Version 1.29.12 [2012-09-12] ## Documentation * Updated one Rd link. # Version 1.29.11 [2012-09-01] ## New Features * Added argument `aliases` to `arrangeCelFilesByChipType()`, e.g. `arrangeCelFilesByChipType(..., aliases=c("Focus"="HG-Focus"))`. ## Bug Fixes * `arrangeCelFilesByChipType(pathnames)` assumed `pathnames` were files in the current directory. # Version 1.29.10 [2012-08-29] ## Code Refactoring * Updated some internal files used solely for maintainance. # Version 1.29.9 [2012-08-29] ## Bug Fixes * The move to Fusion SDK 1.1.2 caused the package to not compile on Windows. # Version 1.29.8 [2012-08-14] ## Significant Changes * Upgraded to Fusion SDK 1.1.2. # Version 1.29.7 [2012-08-14] ## Code Refactoring * Rearranged `patchdir`. # Version 1.29.6 [2012-06-26] # Version 1.29.5 [2012-06-19] ## New Features * Added `arrangeCelFilesByChipType()` for moving CEL files to subdirectories named according to their chip types, which can be useful when for instance downloading GEO data sets. # Version 1.29.4 [2012-06-14] ## New Features * `readPgfEnv(..., indices=NULL)` no longer gives a warning. * Updated the error messages for the CLF and PGF parsers. # Version 1.29.3 [2012-05-22] ## Software Quality * Now system test `tests/testWriteAndReadEmptyCdf.R` generates an error that is detected and reported by `R CMD check`. # Version 1.29.2 [2012-05-22] ## Software Quality * GENERALIZATION: Now system tests that launch another R process no longer assumes R is on the OS's search path. ## Code Refactoring * ROBUSTNESS/CRAN POLICY: `readCel()` and `readCelUnits()` are no longer calling `.Internal(qsort(...))`. # Version 1.29.1 [2012-05-18] ## Bug Fixes * Replaced several `throw()` with `stop()`, because the former assumes that **R.methodsS3** is loaded, which it may not be. ## Software Quality * ROBUSTNESS: Added a system test for validating that the package can write and read a CDF. The test is spawning of another R process so that the test is robust against core dumps. # Version 1.29.0 [2012-03-30] ## Notes * The version number was bumped for the Bioconductor devel version. # Version 1.28.1 [2012-06-26] # Version 1.28.0 [2012-03-30] ## Notes * The version number was bumped for the Bioconductor 2.10 release version. # Version 1.27.5 [2012-03-19] ## Software Quality * ROBUSTNESS: Now the native code throws R errors, instead of printing an error message to stdout/stderr and then returning null, that is translated to errors at the R level. # Version 1.27.4 [2012-03-05] ## Bug Fixes * **affxparser** would not build on Windows with the new Rtools toolchain (Rtools 2.15.0.1915-1919). # Version 1.27.3 [2011-11-18] ## Software Quality * ROBUSTNESS: Added sanity checks asserting that the internal readers did indeed read something and not just returned NULL. It should be very unlikely that this occurs, but there is still a small risk that after asserting that a file exists, but before the internal Fusion SDK parsers access the file, the file has been removed. # Version 1.27.1 [2011-11-01] ## Notes * Same updates as in v1.26.1. # Version 1.27.0 [2011-10-31] ## Notes * The version number was bumped for the Bioconductor devel version. # Version 1.26.4 [2012-03-06] ## Bug Fixes * **affxparser** would not build on Windows with the new Rtools toolchain (Rtools 2.15.0.1915-1919), which is for R (> 2.14.1), i.e. also for R 2.14.2 (but not 2.14.1). This is the same bug fix that was first done in v1.27.4. # Version 1.26.2 [2011-11-16] ## Notes * The version number was bumped by Bioconductor to trigger a build. # Version 1.26.1 [2011-11-01] ## Bug Fixes * FIX: Fixed warning on `In readBin(con, what = "integer", size = 4, n = 1, signed = FALSE, 'signed = FALSE' is only valid for integers of sizes 1 and 2` that some read methods would generated. # Version 1.26.0 [2011-10-31] ## Notes * The version number was bumped for the Bioconductor 2.9 release version. # Version 1.25.1 [2011-09-27] ## Notes * Maintainer email was updated. # Version 1.25.0 [2011-04-13] ## Notes * The version number was bumped for the Bioconductor devel version. # Version 1.24.0 [2011-04-13] ## Notes * The version number was bumped for the Bioconductor 2.8 release version. # Version 1.23.3 [2011-02-22] ## Documentation * DOCUMENTATION: Added a section on what it means by setting a new (different) chip type for the output in `convertCel()`. ## Bug Fixes * The internal `.unwrapDatHeaderString()`, used by `convertCel()` among others, would throw `Internal error: Failed to extract 'pixelRange' and 'sampleName' from DAT header. They became identical: ...` in case the DAT header of the CEL file did not contain all fields. The function has now been updated to be more forgiving and robust so that missing values are returned for such fields instead. # Version 1.23.2 [2011-02-15] ## Documentation * Added a clarification to the help page on 'Cell coordinates and cell indices' that the convention in **affxparser** is to use one-based cell indices, because they are more convenient to use in R. In order to clearly distinguish these from the redudant zero-based index values that also exist in CDF file, an additional section was added on that topic. Moreover, help pages for methods querying CDF files are now referring to the above page, where applicable. Thanks to William Mounts (Pfizer) and Todd Allen for pointing out these ambiguities in the documentation. # Version 1.23.1 [2010-12-16] ## Code Refactoring * ROBUSTNESS: Now `matrix(...)` is used instead of `.Interal(matrix(...))`. # Version 1.23.0 [2010-10-17] ## Notes * The version number was bumped for the Bioconductor devel version. # Version 1.22.0 [2010-10-17] ## Notes * The version number was bumped for the Bioconductor 2.7 release version. # Version 1.21.1 [2010-10-05] ## New Features * Now `readCdfDataFrame()` also returns the cell field `expos`. # Version 1.21.0 [2010-04-22] ## Notes * The version number was bumped for the Bioconductor devel version. # Version 1.20.0 [2010-04-22] ## Notes * The version number was bumped for the Bioconductor 2.6 release version. # Version 1.19.6 [2010-02-11] ## Bug Fixes * More fixes. # Version 1.19.5 [2010-02-09] ## Bug Fixes * More fixes. # Version 1.19.4 [2010-02-06] ## Bug Fixes * More fixes. # Version 1.19.3 [2010-02-06] ## Bug Fixes * A couple of patches to Fusion SDK courtesy of Brian Ripley; they are needed for Windows (64). # Version 1.19.2 [2010-02-02] ## Bug Fixes * A fix to the Windows build. # Version 1.19.1 [2010-01-29] ## Significant Changes * Updated Fusion SDK to 1.1.0. # Version 1.19.0 [2009-10-27] ## Notes * Devel version bumped because of the new Bioconductor release. # Version 1.18.0 [2009-10-27] ## Notes * Release version bumped because of the new Bioconductor 2.5 release. # Version 1.17.5 [2009-09-22] ## Documentation * Fixed broken link in an Rd file. # Version 1.17.4 [2009-09-21] ## New Features * Added `parseDatHeaderString()`, which in combination with `readCelHeader()` can be used to infer the timestamp in the header of a CEL file. # Version 1.17.3 [2009-05-29] ## New Features * Added `applyCdfGroupFields()` and `cdfSetDimension()`. # Version 1.17.2 [2009-02-20] ## Bug Fixes * `readChp()` would crash (segmentation fault) for (at least) some CHP files for GenomeWideSNP\_5 generated by Affymetrix Power Tools. * Updated `compareCels()` to work with new `readCelHeader()`. # Version 1.17.1 [2009-05-09] ## New Features * Now `readCelHeader()` also reads DAT headers from Calvin CEL files. # Version 1.17.0 [2009-04-20] ## Notes * Devel version bumped because of the new Bioconductor release. # Version 1.16.0 [2009-04-20] ## Notes * Release version bumped because of the new Bioconductor 2.4 release. # Version 1.15.6 [2009-02-20] ## New Features * Added optional argument `newChipType` to `convertCel()` for overriding the default chip type. Useful for updating the formal chip type of old CEL files. ## Code Refactoring * Removed all `gc()` calls in `convertCel()`. # Version 1.15.5 [2009-02-14] # Version 1.15.4 [2009-02-10] ## Bug Fixes * `readCcg()` and `readCcgHeader()` no longer give warnings on `truncating string with embedded nul in 'rawToChar()'`. These warnings made no difference, but were annoying. # Version 1.15.3 [2009-01-22] ## Documentation * Fixed a minor Rd problem. # Version 1.15.2 [2008-12-30] ## Notes * Same bug fix as in release version v1.14.2. # Version 1.15.1 [2008-12-04] ## Notes * Same bug fix as in release version v1.14.1. # Version 1.15.0 [2008-10-21] ## Notes * Devel version bumped because of the new Bioconductor release. # Version 1.14.2 [2008-12-30] ## Bug Fixes * `readChp()` would not read all data. Thanks Gabor Csardi for reporting this and providing a patch. # Version 1.14.1 [2008-12-04] ## Bug Fixes * When the CDF file is on a Windows network, that is, has a pathname starting with `//` or `\\`, then the `chiptype` reported by `readCdfHeader()` contains a path component as well. This seems to be due to a bug in Fusion SDK. # Version 1.14.0 [2008-10-21] ## Notes * Release version bumped because of the new Bioconductor 2.3 release. # Version 1.13.8 [2008-08-28] # Version 1.13.7 [2008-08-23] ## Performance * `readCcg()` is substantially faster after removing all `gc()` calls. # Version 1.13.6 [2008-08-21] ## Significant Changes * Updated Fusion SDK from 1.0.10b (Jan 2008) to 1.0.11 (July 2008). # Version 1.13 [2008-08-14] ## Documentation * Fixed typos and incorrect equation in help page '2. Cell coordinates and cell indices'. # Version 1.13.5 [2008-08-09] ## New Features * Made `readCdf()` recognize more unit types. ## Bug Fixes * `writeCdf()` would write `CustomSeq` units as `Tag` units, and vice versa. This means that _ASCII_ CDFs containing such units and converted with `convertCdf()` would be have an incorrect unit type for these units. Also, unit type 'Copy Number' is reported as `"copynumber"` and no longer as `"unknown"`. * The increase of the internal buffer for reading the `refseq` header field of ASCII CDFs that was done in 1.11.2 was mistakenly undone in 1.13.3. # Version 1.13.4 [2008-08-05] ## Documentation * Now `help(createCel)` (and its example) clarifies that the template CEL header can be of v3 (ASCII), v4 (binary;XDA), or v1 (binary;Calvin). ## Code Refactoring * Renamed the `HISTORY` file to `NEWS`. # Version 1.13.3 [2008-05-20] ## Bug Fixes * Now `writeTpmap()` works. # Version 1.13.2 [2008-05-08] ## Notes * Copied all updates in v1.12.2 (release) to v1.13.2 (devel). # Version 1.13.1 [2008-05-02] ## Notes * Copied all updates in v1.12.1 (release) to v1.13.1 (devel). # Version 1.13.0 [2008-04-29] ## Notes * Devel version bumped because of the new Bioconductor release. # Version 1.12.2 [2008-05-09] ## Bug Fixes * **affxparser** 1.12.1 would not build on Windows. Fix by Martin Morgan. # Version 1.12.1 [2008-05-02] ## New Features * Added `readChp()`. Contribution by Robert Gentleman. # Version 1.12.0 [2008-04-29] ## Notes * Release version bumped because of the new Bioconductor 2.2 release. # Version 1.11.13 [2008-04-13] ## Significant Changes * Updated to Fusion SDK v1.0.10b. # Version 1.11 [2008-03-06] ## Bug Fixes * Regular expression pattern `a-Z` is illegal on (at least) some locale, e.g. `C` (where `A-z` works). The only way to specify the ASCII alphabet is to list all characters explicitly, which we now do in all methods of the package. See the r-devel thread "invalid regular expression '[a-Z]'" on 2008-03-05 for details. # Version 1.11.6 [2008-03-04] ## New Features * Added trial versions of `readClf()` and `readPgf()`. # Version 1.11.5 [2008-02-29] ## New Features * Updated `cdfMergeStrands()` to merge any even number of groups, not only units with two or four group pairs. ## Bug Fixes * The code in `findFiles()` for testing if **R.utils** is loaded or not was not correct making it fail to detect **R.utils**. # Version 1.11.4 [2008-02-20] ## New Features * Added argument '`allFiles = TRUE'` to `findFiles()`. * Updated `readCcg()` according to the newer file format specifications. Now it is possible to do low-level reading of copy-number CNCHP files generated by the Affymetrix Genotype Console v2. ## Code Refactoring * Now `findFiles()` and hence `findCdf()` is only utilizing the **R.utils** package if it is already loaded. It will no longer try to load **R.utils**. # Version 1.11.3 [2007-12-01] ## New Features * Removed argument `reorder` from `readCel()` and `readCelUnits()` since its name was misleading (the returned value was identical regardless of `reorder`, but the reading speed was faster when `reorder` was TRUE, which is how it is now hardwired). # Version 1.11.2 [2007-11-06] ## Bug Fixes * Reading a CDF that has a `refseq` header field longer than 65,000 symbols would crash R, e.g. when reading certain CDFs for resequencing chip types. A buffer size internal of Fusion SDK was increased from 65,000 to 400,000 bytes. Thanks Wenyi Wang for reporting this. * Argument `verbose` of `tpmap2bpmap()` was not coerced to integer before passed to the native code. * The internal `.initializeCdf()`, used when creating new CDFs, had an error message refering to an invalid `qcUnitLengths` when it was supposed to be `unitLengths`. Thanks Elizabeth Purdom for reporting this. ## Code Refactoring * created a Makefile in `/inst/info` for comparing Fusion SDK with **affxparser**. # Version 1.11.1 [2007-10-12] ## New Features * `convertCel()` will no longer generate a warning if the corresponding CDF file was not found. ## Bug Fixes * For some Calvin CEL files the CEL header does not contain "parent parameter" `affymetrix-dat-header` but only parameter `affymetrix-partial-dat-header`. In that case `convertCel()` would throw an error about `sprintf("DatHeader= %s\n", datHeader)`. Now a "fake" DAT header is created from the partial one. If neither is found, a slightly more informative exception is thrown. # Version 1.11.0 [2007-10-02] ## Notes * Version bumped because of the new Bioconductor release. # Version 1.9.5 [2007-09-16] ## New Features * Added argument '`recursive=TRUE'` to `findCdf()`. Note, the current working directory is always scanned first, but never recursively (unless explicitly added to the search path). This is to avoid "endless" scans in case the search path has not been set. * `findFiles()` now do a breath-first search in lexicographic order. * Removed default search paths `cdf/` and `data/cdf/`. We do not want to enforce a standard path. ## Code Refactoring * Now the examples (as well as test scripts) utilize data available in the new Bioconductor **AffymetrixDataTestFiles** package. This means that `R CMD check` now runs much more tests, which is good. * CLEAN UP: Removed many of the old `testscripts/` scripts. They are now under `tests/`. ## Bug Fixes * `findFiles()` was not robust against broken Unix links. * If the destination file already existed, `convertCel()` would correctly detect that, but would report the name of the source file. # Version 1.9.4 [2007-08-25] ## Significant Changes * Updated to Fusion SDK v1.0.9. # Version 1.9.3 [2007-08-16] ## Notes * See updated made to release v1.8.3 below. * The only difference between v1.9.3 and v1.8.3 is the modification of `findCdf()` in v1.9.2. # Version 1.9.2 [2007-07-27] ## Significant Changes * This Bioconductor devel version requires R v2.6.0 due to a change of how strings are handled in C by Bioconductor. ## New Features * Modified `findCdf()` such that it is possible to set an alternative function for how CDFs are located. # Version 1.8.3 [NA] ## New Features * Made several updated so that **affxparser** better supports CEL files in the new Command Console (Calvin) file format: - `isCelFile()` recognized Calvin CEL files. - `convertCel()` can convert a Calvin CEL files into v4 CEL files. - `writeCelHeader()` can write v4 CEL headers given Calvin CEL header. ## Code Refactoring * Added internal/private function to read Command Console Generic (CCG) files, also known as Calvin files. Note, these functions are currently _not_ utilizing the Fusion SDK library, but are instead written from scratch based on the Affymetrix' file format definitions. # Version 1.8.2 [2007-08-01] ## Performance * Optimized `writeCdfHeader()` for memory. For a CDF with 1,200,000+ units just writing the unit names would consume 1-1.5 GiB RAM. Now it writes unit names in chunks keeping the memory overhead around 100-200 MiB. * Made `convertCdf()` more memory efficient. ## Bug Fixes * Error message in `isCelFile()` when the file was not found was broken. # Version 1.8.1 [2007-07-26] ## Significant Changes * Now **affxparser** install on OSX with PPC. # Version 1.8.0 [2007-04-24] ## Notes * The version number was bumped up with the Bioconductor release. # Version 1.7.5 [2007-03-08] ## New Features * Added argument `truncateGroupNames` to `readCdfGroupNames()` which defaults to TRUE for backward compatibility. When TRUE, any prefix of group names identical to the unit name will be stripped of the group names. # Version 1.7.4 [2007-02-21] ## New Features * Now `readCelUnits()` can handle unit groups for which there are no probes, e.g. when stratifying on PM in a unit containing only MMs. * Added `writeCdfHeader()`, `writeCdfQcUnits()` and `writeCdfUnits()`. These are all used by `writeCdf()`. They also make it possible to write a CDF in chunks in order to for instance `convertCdf()` in constant memory. # Version 1.7.3 [2007-01-05] ## New Features * Added `cdfAddPlasqTypes()`. * Now `readCdfUnits(..., readDirections=TRUE)` also returns group directions. * Now `readCdf()` reads all unit and group fields by default. * In addition to optimizing IO time, read maps can be used to unrotate CEL data rotated by the dChip software. For more information, see help on "Cell-index maps for reading and writing". ## Bug Fixes * Using read maps for `readCel()` would give an error saying the read map is invalid even when it is not. # Version 1.7.2 [2006-12-10] ## New Features * Added argument `isPm` to `readCdf()`. ## Bug Fixes * `readCdfUnits()` and `readCdfCellIndices()` with `stratifyBy="mm"` would return the same as `stratifyBy="pm"`. Options `"pm"` and `"pmmm"` are unaffected by this fix. # Version 1.7.1 [2006-11-03] ## Significant Changes * Updated to Fusion SDK v1.0.8. * Windows build change: The Windows version is building against the Windows code of Fusion SDK not the POSIX code. In order to do this we have had to patch the preprocessor code in several of the Fusion SDK source-code files, which has to be redone manually whenever Fusion is updated. Starting with this version, we instead set the `_MSC_VER` flag used in the Fusion code to indicate Windows (set by the Microsoft Visual C++ compiler). Since we are using MINGW this flag is obviously not set. Faking `_MSC_VER` this way leaves us only having to patch one single file in the Fusion release instead of 10-20. Hopefully there are no other side effects. ## Bug Fixes * In Fusion SDK (v1.0.5) that previous version of **affxparser** used, a CDF file was never closed on Unix platforms (this bug was not in the Windows version). Since Fusion allocated memory for the complete CDF (even if a subset is only read), this caused the memory usage to blow up, when reading the same or different CDF files multiple times, because the memory was never deallocated. Thanks Seth Falcon and Ken Simpson for reporting this problem. # Version 1.7.0 [2006-10-25] ## New Features * Made `readCelUnits()` a bit more clever if a `cdf` structure with only cell indices is passed. Then all fields are just indices and one can call unlist immediately. This speeds things up a bit. ## Bug Fixes * `writeCdf()` would create an invalid CDF file if there were no QC units. This would in turn make `readCdfUnits()` etc core dump. * Similar to get bug fix in the C code for `readCelHeader()`, much of the C-level code for CDF (and BPMAP) files assumes that the strings from Fusion SDK have a null terminator. At least for CDF unit names, this is not necessarily the case. To be on the safe side, for all retrieved Fusion SDK strings we now make sure there is a null terminator before converting it into an R string. Thanks to Ken Simpson at WEHI for all the troubleshooting. * Because of the above bug fix, the ASCII mouse exon CDF can now be converted into a valid binary CDF. ## Notes * The devel version number was bumped up with the Bioconductor release. # Version 1.6.0 [2006-10-03] ## Notes * The version number was bumped up with the Bioconductor release. # Version 1.5 [2006-09-21] ## New Features * Added `compareCdfs()` to verify that a converted CDF is correct. * Added `convertCdf()` utilizing the new `writeCdf()`. * Added trial version of `createCel()`. * Added trial version of `updateCelUnits()`. ## Bug Fixes * The C code for `readCelHeader()` did not allocate space for the string null terminator for the header elements that originates from wide C++ string. This caused `readCelHeader()` to contain string elements with random characters at the end. * nrows and ncols were swapped in the CDF header when written by `writeCdf()`. This was missed because all tested CDFs were square. ## Software Quality * Now the package passes `R CMD check` without warnings. # Version 1.5.4 [2006-08-18] ## Significant Changes * Updated Fusion SDK to version 1.0.7. ## Bug Fixes * The new implementation of `updateCel()` utilizing raw vectors was not correct; extra zeros was written too. The example code of `updateCel()` reveals such errors much easier now. * `updateCel()` would in some cases give `Error: subscript out of bounds` when writing the last chunk. # Version 1.5.3 [2006-07-24] ## New Features * Added functional prototype of `updateCel()` to _update_ binary (v4) CEL files. Currently, the code does make use the Fusion SDK. There is currently no `writeCel()` to create a CEL file from scratch. However, with the auxillary function `copyCel()` one can copy an existing CEL file and then update that one. Thus, it is now possible to write, say, normalized probe intensities to a CEL file. Note that this is only a first prototype and functions may change in a future release. ## Performance * Improved the speed of `updateCel()` substantially by first working with raw vector in memory and then write binary data to file. Data is also written in chunks (instead of all at once), to minimize the memory overhead of using raw vectors, which is especially important for the larger chips, e.g. 500K. # Version 1.5.2 [2006-05-31] ## Significant Changes * Updated Fusion SDK to version 1.0.6. # Version 1.5.1 [2006-05-15] ## Significant Changes * Updated Fusion SDK to version 1.0.5. ## Bug Fixes * Made small changes to the SDK to allow it to compile under Mac OS X with GCC-4.0.3 shipping with R-2.3.0. ## Code Refactoring * Made changes to the `Makevars`, `_Makefile` and `cmd_line` scripts. # Version 1.5.0 [2006-05-12] ## New Features * Added `cdfOrderBy()` and `cdfOrderColumnsBy()` for restructuring group fields in a CDF list structure. Added `cdfGetGroups()` too. ## Documentation * Cleaned up and restructured the help pages; several Rd pages are now made "internal" so they do not show up on the help index page. Instead they are accessable from within other help pages (if you browsing via HTML that is). Added a help page on common terms. * Added a bit more documentation on how to set the default CDF path. ## Bug Fixes * On Linux 64-bit read CEL intensities would all be zero. This was due to compiler settings in the Fusion SDK package, which is circumvented by gcc compile it with a lower optimization level. * When argument `cdf` was a CDF list structure with elements `type` or `direction`, `readCelUnits()` would not read the correct cells because the values of `type` and `direction` would be included in the extracted list of cell indices. # Version 1.4.0 [2006-04-27] ## Notes * The stable version for Bioconductor 1.8. # Version 1.3.3 [2006-04-15] ## Significant Changes * The package now works on Solaris. * Updated the Fusion SDK to version 1.0.5 (an unofficial release). ## New Features * New method `readCdfCellIndices()`, which is a 5-10 times faster special-case implementation of `readCdfUnits()` to read cell indices only. * Renamed `readCdfUnitsMap()` to `readCdfUnitsWriteMap()`. * New method `invertMap()` for fast inversion of maps. ## Performance * Now `readCelUnits()` sorts the cell indices before reading the data from each file. This minimizes the amount of jumping around in the CEL files resulting in a speed-up of about 5-10 times. ## Known Issues * KNOWN BUGS: The weird bug as in v1.3.2 remains with the new Fusion SDK, R v2.3.0 beta (2006-04-10 r37715) on WinXP. Internally `readCdfCellIndices()` replaces `readCdfUnits()`, but the error is still the same. # Version 1.3.2 [2006-03-28] ## Significant Changes * All cell and unit indices are now starting from one and not from zero. This change requires that all code that have been using a previous version of this package have to be updated! ## New Features * New methods `readCelRectangle()` to read probe signals from a specify area of the chip. ## Documentation * Added extensive help on cell coordinates and cell indices as well read and write maps. ## Known Issues * KNOWN BUGS: At least on WinXP, heavy use of `readCelUnits()` will sooner or later core dump R; it seems to be a memory related from that occur when reading the CDF and extracting the name of the unit. However, when "torturing" `readCdfUnits()` the crash won't happen so it might be that `readCel()` does something. Have not tried on other platforms. ## Performance * Further optimization in speed and memory for most methods. # Version 1.3.1 [NA] affxparser/R/0000755000175200017520000000000014516003651014137 5ustar00biocbuildbiocbuildaffxparser/R/901.Dictionary.R0000644000175200017520000000364014516003651016702 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocDocumentation "1. Dictionary" # # \description{ # This part describes non-obvious terms used in this package. # # \describe{ # \item{affxparser}{The name of this package.} # \item{API}{Application program interface, which describes the # functional interface of underlying methods.} # \item{block}{(aka group).} # \item{BPMAP}{A file format containing information # related to the design of the tiling arrays.} # \item{Calvin}{A special binary file format.} # \item{CDF}{A file format: chip definition file.} # \item{CEL}{A file format: cell intensity file.} # \item{cell}{(aka feature) A probe.} # \item{cell index}{An integer that identifies a probe uniquely.} # \item{chip}{An array.} # \item{chip type}{An identifier specifying a chip design # uniquely, e.g. \code{"Mapping50K_Xba240"}.} # \item{DAT}{A file format: contains pixel intensity # values collected from an Affymetrix GeneArray scanner.} # \item{feature}{A probe.} # \item{Fusion SDK}{Open-source software development kit (SDK) provided # by Affymetrix to access their data files.} # \item{group}{(aka block) # Defines a unique subset of the cells in a unit. # Expression arrays typically only have one group per unit, whereas # SNP arrays have either two or four groups per unit, one for each of # the two allele times possibly repeated for both strands.} # \item{MM}{Mismatch-match, e.g. MM probe.} # \item{PGF}{A file format: probe group file.} # \item{TPMAP}{A file format storing the relationship between (PM,MM) # pairs (or PM probes) and positions on a set of sequences.} # \item{QC}{Quality control, e.g. QC probes and QC probe sets.} # \item{unit}{A probeset.} # \item{XDA}{A file format, aka as the binary file format.} # } # } #*/######################################################################### affxparser/R/902.CellCoordinatesAndIndices.R0000644000175200017520000000735514516003651021601 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocDocumentation "2. Cell coordinates and cell indices" # # \description{ # This part describes how Affymetrix \emph{cells}, also known as # \emph{probes} or \emph{features}, are addressed. # } # # \section{Cell coordinates}{ # In Affymetrix data files, cells are uniquely identified by there # \emph{cell coordinates}, i.e. \eqn{(x,y)}. For an array with # \eqn{N*K} cells in \eqn{N} rows and \eqn{K} columns, the \eqn{x} # coordinate is an integer in \eqn{[0,K-1]}, and the \eqn{y} coordinate # is an integer in \eqn{[0,N-1]}. The cell in the upper-left corner has # coordinate \eqn{(x,y)=(0,0)} and the one in the lower-right corner # \eqn{(x,y)=(K-1,N-1)}. # } # # \section{Cell indices and cell-index offsets}{ # To simplify addressing of cells, a coordinate-to-index function is # used so that each cell can be addressed using a single integer instead # (of two). Affymetrix defines the \emph{cell index}, \eqn{i}, of # cell \eqn{(x,y)} as # \deqn{ # i = K*y + x + 1, # } # where one is added to give indices in \eqn{[1,N*K]}. # Continuing, the above definition means that cells are ordered # row by row, that is from left to right and from top to bottom, # starting at the upper-left corner. # For example, with a chip layout \eqn{(N,K)=(1600,1600)} the cell at # \eqn{(x,y)=(0,0)} has index i=1, and the cell at \eqn{(x,y)=(1599,1599)} # has index \eqn{i=2560000}. # A cell at \eqn{(x,y)=(1498,3)} has index \eqn{i=6299}. # # Given the cell index \eqn{i}, the coordinate \eqn{(x,y)} can be # calculated as # \deqn{ # y = floor((i-1)/K) # } # \deqn{ # x = (i-1)-K*y. # } # Continuing the above example, the coordinate for cell \eqn{i=1} is # be found to be \eqn{(x,y)=(0,0)}, for cell \eqn{i=2560000} it is # \eqn{(x,y)=(1599,1599)}, for cell \eqn{i=6299} is it # \eqn{(x,y)=(1498,3)}. # } # # \section{Converting between cell indices and (x,y) coordinates in R}{ # Although not needed to use the methods in this package, to get the # cell indices for the cell coordinates or vice versa, see # \code{\link[affy:xy2indices]{xy2indices}()} and \code{indices2xy()} # in the \bold{affy} package. # } # # \section{Note on the zero-based "index" field of Affymetrix CDF files}{ # An Affymetrix CDF file provides information on which cells should be # grouped together. To identify these groups of cells, the cells # are specified by their (x,y) coordinates, which are stored as # zero-based coordinates in the CDF file. # # All methods of the \pkg{affxparser} package make use of these # (x,y) coordinates, and some methods make it possible to read # them as well. However, it is much more common that the methods # return cell indices \emph{calculated} from the (x,y) coordinates # as explained above. # # In order to conveniently work with cell indices in \R, the # convention in \emph{affxparser} is to use \emph{one-based} # indices. # Hence the addition (and subtraction) of 1:s in the above equations. # This is all taken care of by \pkg{affxparser}. # # Note that, in addition to (x,y) coordinates, a CDF file also contains # a one-based "index" for each cell. This "index" is redundant to # the (x,y) coordinate and can be calculated analogously to the # above \emph{cell index} while leaving out the addition (subtraction) # of 1:s. # Importantly, since this "index" is redundant (and exists only in # CDF files), we have decided to treat this field as an internal field. # Methods of \pkg{affxparser} do neither provide access to nor make # use of this internal field. # } # # @author "HB" ##*/######################################################################### affxparser/R/909.CellIndexMaps.R0000644000175200017520000001462614516003651017303 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocDocumentation "9. Advanced - Cell-index maps for reading and writing" # # \description{ # This part defines read and write maps that can be used to remap # cell indices before reading and writing data from and to file, # respectively. # # This package provides methods to create read and write (cell-index) # maps from Affymetrix CDF files. These can be used to store the cell # data in an optimal order so that when data is read it is read in # contiguous blocks, which is faster. # # In addition to this, read maps may also be used to read CEL files that # have been "reshuffled" by other software. For instance, the dChip # software (\url{http://www.dchip.org/}) rotates Affymetrix Exon, # Tiling and Mapping 500K data. See example below how to read # such data "unrotated". # # For more details how cell indices are defined, see # @see "2. Cell coordinates and cell indices". # } # # \section{Motivation}{ # When reading data from file, it is faster to read the data in # the order that it is stored compared with, say, in a random order. # The main reason for this is that the read arm of the hard drive # has to move more if data is not read consecutively. Same applies # when writing data to file. The read and write cache of the file # system may compensate a bit for this, but not completely. # # In Affymetrix CEL files, cell data is stored in order of cell indices. # Moreover, (except for a few early chip types) Affymetrix randomizes # the locations of the cells such that cells in the same unit (probeset) # are scattered across the array. # Thus, when reading CEL data arranged by units using for instance # @see "readCelUnits", the order of the cells requested is both random # and scattered. # # Since CEL data is often queried unit by unit (except for some # probe-level normalization methods), one can improve the speed of # reading data by saving data such that cells in the same unit are # stored together. A \emph{write map} is used to remap cell indices # to file indices. When later reading that data back, a # \emph{read map} is used to remap file indices to cell indices. # Read and write maps are described next. # } # # \section{Definition of read and write maps}{ # Consider cell indices \eqn{i=1, 2, ..., N*K} and file indices # \eqn{j=1, 2, ..., N*K}. # A \emph{read map} is then a \emph{bijective} (one-to-one) function # \eqn{h()} such that # \deqn{ # i = h(j), # } # and the corresponding \emph{write map} is the inverse function # \eqn{h^{-1}()} such that # \deqn{ # j = h^{-1}(i). # } # Since the mapping is required to be bijective, it holds that # \eqn{i = h(h^{-1}(i))} and that \eqn{j = h^{-1}(h(j))}. # For example, consider the "reversing" read map function # \eqn{h(j)=N*K-j+1}. The write map function is \eqn{h^{-1}(i)=N*K-i+1}. # To verify the bijective property of this map, we see that # \eqn{h(h^{-1}(i)) = h(N*K-i+1) = N*K-(N*K-i+1)+1 = i} as well as # \eqn{h^{-1}(h(j)) = h^{-1}(N*K-j+1) = N*K-(N*K-j+1)+1 = j}. # } # # \section{Read and write maps in R}{ # In this package, read and write maps are represented as @integer # @vectors of length \eqn{N*K} with \emph{unique} elements in # \eqn{\{1,2,...,N*K\}}. # Consider cell and file indices as in previous section. # # For example, the "reversing" read map in previous section can be # represented as # \preformatted{ # readMap <- (N*K):1 # } # Given a @vector \code{j} of file indices, the cell indices are # the obtained as \code{i = readMap[j]}. # The corresponding write map is # \preformatted{ # writeMap <- (N*K):1 # } # and given a @vector \code{i} of cell indices, the file indices are # the obtained as \code{j = writeMap[i]}. # # Note also that the bijective property holds for this mapping, that is # \code{i == readMap[writeMap[i]]} and \code{i == writeMap[readMap[i]]} # are both @TRUE. # # Because the mapping is bijective, the write map can be calculated from # the read map by: # \preformatted{ # writeMap <- order(readMap) # } # and vice versa: # \preformatted{ # readMap <- order(writeMap) # } # Note, the @see "invertMap" method is much faster than \code{order()}. # # Since most algorithms for Affymetrix data are based on probeset (unit) # models, it is natural to read data unit by unit. Thus, to optimize the # speed, cells should be stored in contiguous blocks of units. # The methods @see "readCdfUnitsWriteMap" can be used to generate a # \emph{write map} from a CDF file such that if the units are read in # order, @see "readCelUnits" will read the cells data in order. # Example: # \preformatted{ # Find any CDF file # cdfFile <- findCdf() # # # Get the order of cell indices # indices <- readCdfCellIndices(cdfFile) # indices <- unlist(indices, use.names=FALSE) # # # Get an optimal write map for the CDF file # writeMap <- readCdfUnitsWriteMap(cdfFile) # # # Get the read map # readMap <- invertMap(writeMap) # # # Validate correctness # indices2 <- readMap[indices] # == 1, 2, 3, ..., N*K # } # # \emph{Warning}, do not misunderstand this example. It can not be used # improve the reading speed of default CEL files. For this, the data in # the CEL files has to be rearranged (by the corresponding write map). # } # # \section{Reading rotated CEL files}{ # It might be that a CEL file was rotated by another software, e.g. # the dChip software rotates Affymetrix Exon, Tiling and Mapping 500K # arrays 90 degrees clockwise, which remains rotated when exported # as CEL files. To read such data in a non-rotated way, a read # map can be used to "unrotate" the data. The 90-degree clockwise # rotation that dChip effectively uses to store such data is explained by: # \preformatted{ # h <- readCdfHeader(cdfFile) # # (x,y) chip layout rotated 90 degrees clockwise # nrow <- h$cols # ncol <- h$rows # y <- (nrow-1):0 # x <- rep(1:ncol, each=nrow) # writeMap <- as.vector(y*ncol + x) # } # # Thus, to read this data "unrotated", use the following read map: # \preformatted{ # readMap <- invertMap(writeMap) # data <- readCel(celFile, indices=1:10, readMap=readMap) # } # } # # @author "HB" # # @keyword internal #*/######################################################################### affxparser/R/applyCdfBlocks.R0000644000175200017520000000254414516003651017167 0ustar00biocbuildbiocbuildapplyCdfBlocks <- function(cdf, fcn, ..., .key="blocks") { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'cdf': if (!is.list(cdf)) { stop("Argument 'cdf' is not a list: ", mode(cdf)); } # Argument 'fcn': if (!is.function(fcn)) { stop("Argument 'fcn' is not a function: ", mode(fcn)); } # Nothing to do? nbrOfUnits <- length(cdf); if (nbrOfUnits == 0) return(cdf); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Iterate over all unit block sets. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Assume that the 'blocks' field have the same position in all units! ff <- match(.key, names(cdf[[1]])); if (is.na(ff)) stop("There is no such element in CDF structure: ", .key); for (uu in seq_along(cdf)) { unit <- .subset2(cdf, uu); unit[[ff]] <- fcn(.subset2(unit, ff), ...); cdf[[uu]] <- unit; } cdf; } ############################################################################ # HISTORY: # 2006-09-08 # o Created from applyCdfGroups(). This is due to the planned name change # from 'groups' to 'blocks'. # 2006-02-23 # o Created. ############################################################################ affxparser/R/applyCdfGroupFields.R0000644000175200017520000000215214516003651020170 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction applyCdfGroupFields # # @title "Applies a function to a list of fields of each group in a CDF structure" # # \description{ # @get "title". # } # # @synopsis # # \arguments{ # \item{cdf}{A CDF @list structure.} # \item{fcn}{A @function that takes a @list structure of fields # and returns an updated @list of fields.} # \item{...}{Arguments passed to the \code{fcn} function.} # } # # \value{ # Returns an updated CDF @list structure. # } # # @author "HB" # # \seealso{ # @see "applyCdfGroups". # } # # @keyword programming #**/####################################################################### applyCdfGroupFields <- function(cdf, fcn, ...) { applyCdfGroups(cdf, function(groups) { lapply(groups, FUN=fcn, ...); }); } # applyCdfGroupFields() ############################################################################ # HISTORY: # 2009-05-29 # o Added Rdoc comments. # o Created. ############################################################################ affxparser/R/applyCdfGroups.R0000644000175200017520000000557414516003651017237 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction applyCdfGroups # @alias applyCdfBlocks # # @title "Applies a function over the groups in a CDF structure" # # \description{ # @get "title". # } # # @synopsis # # \arguments{ # \item{cdf}{A CDF @list structure.} # \item{fcn}{A @function that takes a @list structure of group elements # and returns an updated @list of groups.} # \item{...}{Arguments passed to the \code{fcn} function.} # } # # \value{ # Returns an updated CDF @list structure. # } # # \section{Pre-defined restructuring functions}{ # \itemize{ # \item{Generic:}{ # \itemize{ # \item @see "cdfGetFields" - Gets a subset of groups fields in a CDF # structure. # \item @see "cdfGetGroups" - Gets a subset of groups in a CDF structure. # \item @see "cdfOrderBy" - Orders the fields according to the value of # another field in the same CDF group. # \item @see "cdfOrderColumnsBy" - Orders the columns of fields according # to the values in a certain row of another field in the same CDF group. # }} # \item{Designed for SNP arrays:}{ # \itemize{ # \item @see "cdfAddBaseMmCounts" - Adds the number of allele A and # allele B mismatching nucleotides of the probes in a CDF structure. # \item @see "cdfAddProbeOffsets" - Adds probe offsets to the groups in # a CDF structure. # \item @see "cdfGtypeCelToPQ" - Function to imitate Affymetrix' # \code{gtype_cel_to_pq} software. # \item @see "cdfMergeAlleles" - Function to join CDF allele A and # allele B groups strand by strand. # \item @see "cdfMergeStrands" - Function to join CDF groups with the # same names. # }} # } # # We appreciate contributions. # } # # @examples "../incl/applyCdfGroups.Rex" # # @author "HB" # # @keyword programming #**/####################################################################### applyCdfGroups <- function(cdf, fcn, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'cdf': if (!is.list(cdf)) { stop("Argument 'cdf' is not a list: ", mode(cdf)); } # Argument 'fcn': if (!is.function(fcn)) { stop("Argument 'fcn' is not a function: ", mode(fcn)); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Iterate over all unit group sets. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (uu in seq_along(cdf)) { unit <- .subset2(cdf, uu); unit$groups <- fcn(.subset2(unit, "groups"), ...); cdf[[uu]] <- unit; } cdf; } ############################################################################ # HISTORY: # 2006-12-30 # o Using .subset2() instead of [[(). # 2006-02-23 # o Created. ############################################################################ affxparser/R/arrangeCelFilesByChipType.R0000644000175200017520000000716514516003651021262 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction arrangeCelFilesByChipType # # @title "Moves CEL files to subdirectories with names corresponding to the chip types" # # \description{ # @get "title" according to the CEL file headers. # For instance, a HG_U95Av2 CEL file with pathname "data/foo.CEL" # will be moved to subdirectory \code{celFiles/HG_U95Av2/}. # } # # @synopsis # # \arguments{ # \item{pathnames}{A @character @vector of CEL pathnames to be moved.} # \item{path}{A @character string specifying the root output directory, # which in turn will contain chip-type subdirectories. # All directories will be created, if missing.} # \item{aliases}{A named @character string with chip type aliases. # For instance, \code{aliases=c("Focus"="HG-Focus")} will treat # a CEL file with chiptype label 'Focus' (early-access name) as # if it was 'HG-Focus' (official name).} # \item{...}{Not used.} # } # # \value{ # Returns (invisibly) a named @character @vector of the new pathnames # with the chip types as the names. # Files that could not be moved or where not valid CEL files # are set to missing values. # } # # \seealso{ # The chip type is inferred from the CEL file header, # cf. @see "readCelHeader". # } # # @author "HB" # # @keyword programming # @keyword internal #**/####################################################################### arrangeCelFilesByChipType <- function(pathnames=list.files(pattern="[.](cel|CEL)$"), path="celFiles/", aliases=NULL, ...) { requireNamespace("R.utils") || stop("Package not loaded: R.utils"); Arguments <- R.utils::Arguments isFile <- R.utils::isFile filePath <- R.utils::filePath # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'path': path <- Arguments$getCharacter(path); # Argument 'path': if (!is.null(aliases)) { aliases <- Arguments$getCharacters(aliases, useNames=TRUE); stopifnot(!is.null(names(aliases))); } pathnamesD <- rep(NA_character_, times=length(pathnames)); chipTypes <- rep(NA_character_, times=length(pathnames)); for (ii in seq_along(pathnames)) { pathname <- pathnames[ii]; # Skip non-existing files if (!isFile(pathname)) { next; } hdr <- readCelHeader(pathname); chipType <- hdr$chiptype; # Rename according to alias? if (!is.null(aliases)) { alias <- aliases[chipType]; if (!is.na(alias)) { chipType <- alias; } } chipTypes[ii] <- chipType; filename <- basename(pathname); pathD <- filePath(path, chipType); pathnameD <- Arguments$getWritablePathname(filename, path=pathD); res <- file.rename(from=pathname, to=pathnameD); if (res) { pathnamesD[ii] <- pathnameD; } } # for (ii ...) names(pathnamesD) <- chipTypes; invisible(pathnamesD); } # arrangeCelFilesByChipType() ############################################################################ # HISTORY: # 2015-01-06 # o Now using requireNamespace() instead of require(). # 2014-08-25 # o Now using stop() instead of throw(). # 2012-09-01 # o Added argument 'aliases' to arrangeCelFilesByChipType(), e.g. # arrangeCelFilesByChipType(..., aliases=c("Focus"="HG-Focus")). # o BUG FIX: arrangeCelFilesByChipType(pathnames) assumed 'pathnames' # were files in the current directory. # 2012-06-19 # o Created. ############################################################################ affxparser/R/cdfAddBaseMmCounts.R0000644000175200017520000001033014516003651017705 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction cdfAddBaseMmCounts # # @title "Adds the number of allele A and allele B mismatching nucleotides of the probes in a CDF structure" # # \description{ # @get "title". # # This @function is design to be used with @see "applyCdfGroups" # on an Affymetrix Mapping (SNP) CDF @list structure. # # Identifies the number of nucleotides (bases) in probe sequences that # mismatch the the target sequence for allele A and the allele B, # as used by [1]. # } # # @synopsis # # \arguments{ # \item{groups}{A @list structure with groups. # Each group must contain the fields \code{tbase}, \code{pbase}, and # \code{offset} (from @see "cdfAddProbeOffsets"). # } # \item{...}{Not used.} # } # # \value{ # Returns a @list structure with the same number of groups as the # \code{groups} argument. To each group, two fields is added: # \item{mmACount}{The number of nucleotides in the probe sequence # that mismatches the target sequence of allele A.} # \item{mmBCount}{The number of nucleotides in the probe sequence # that mismatches the target sequence of allele B.} # } # # \details{ # Note that the above counts can be inferred from the CDF structure alone, # i.e. no sequence information is required. # Consider a probe group interrogating allele A. First, all PM probes # matches the allele A target sequence perfectly regardless of shift. # Moreover, all these PM probes mismatch the allele B target sequence # at exactly one position. Second, all MM probes mismatches the # allele A sequence at exactly one position. This is also true for # the allele B sequence, \emph{except} for an MM probe with zero offset, # which only mismatch at one (the middle) position. # For a probe group interrogating allele B, the same rules applies with # labels A and B swapped. # In summary, the mismatch counts for PM probes can take values 0 and 1, # and for MM probes they can take values 0, 1, and 2. # } # # \seealso{ # To add required probe offsets, @see "cdfAddProbeOffsets". # @see "applyCdfGroups". # } # # @author "HB" # # \references{ # [1] LaFramboise T, Weir BA, Zhao X, Beroukhim R, Li C, Harrington D, # Sellers WR, and Meyerson M. \emph{Allele-specific amplification in # cancer revealed by SNP array analysis}, PLoS Computational Biology, # Nov 2005, Volume 1, Issue 6, e65.\cr # [2] Affymetrix, \emph{Understanding Genotyping Probe Set Structure}, 2005. # \url{http://www.affymetrix.com/support/developer/whitepapers/genotyping_probe_set_structure.affx}\cr # } # # @keyword programming # @keyword internal #**/####################################################################### cdfAddBaseMmCounts <- function(groups, ...) { for (gg in seq_along(groups)) { group <- groups[[gg]]; # Find PM probes tbase <- group$tbase; pbase <- group$pbase; isPm <- ((tbase == "a" | tbase == "A") & (pbase == "t" | pbase == "T")) | ((tbase == "t" | tbase == "T") & (pbase == "a" | pbase == "A")) | ((tbase == "c" | tbase == "C") & (pbase == "g" | pbase == "G")) | ((tbase == "g" | tbase == "G") & (pbase == "c" | pbase == "C")); # Find the center probes isCentered <- (group$offset == 0); dim <- dim(isCentered); if (is.null(dim)) { mmACount <- mmBCount <- rep(as.integer(1), length(isCentered)); } else { mmACount <- mmBCount <- array(as.integer(1), dim=dim, dimnames=dimnames(isCentered)); } # Is this probe group interrogating allele A? isA <- (gg %% 2 == 1); if (isA) { mmACount[isPm] <- as.integer(0); mmBCount[!isPm & !isCentered] <- as.integer(2); } else { mmBCount[isPm] <- as.integer(0); mmACount[!isPm & !isCentered] <- as.integer(2); } # Add the new fields group$mmACount <- mmACount; group$mmBCount <- mmBCount; groups[[gg]] <- group; } groups; } ############################################################################ # HISTORY: # 2006-06-19 # o Added more Rdoc help. # 2006-03-07 # o Created. ############################################################################ affxparser/R/cdfAddPlasqTypes.R0000644000175200017520000001001214516003651017447 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction cdfAddPlasqTypes # # @title "Adds the PLASQ types for the probes in a CDF structure" # # \description{ # @get "title". # # This @function is design to be used with @see "applyCdfGroups" # on an Affymetrix Mapping (SNP) CDF @list structure. # } # # @synopsis # # \arguments{ # \item{groups}{A @list structure with groups. # Each group must contain the fields \code{tbase}, \code{pbase}, # and \code{expos}. # } # \item{...}{Not used.} # } # # \value{ # Returns a @list structure with the same number of groups as the # \code{groups} argument. To each group, one fields is added: # \item{plasqType}{A @vector of @integers in [0,15].} # } # # \details{ # This function identifies the number of nucleotides (bases) in probe # sequences that mismatch the the target sequence for allele A and the # allele B, as used by PLASQ [1], and adds an integer [0,15] interpreted # as one of 16 probe types. In PLASQ these probe types are referred to as: # 0=MMoBR, 1=MMoBF, 2=MMcBR, 3=MMcBF, 4=MMoAR, 5=MMoAF, 6=MMcAR, 7=MMcAF, # 8=PMoBR, 9=PMoBF, 10=PMcBR, 11=PMcBF, 12=PMoAR, 13=PMoAF, 14=PMcAR, # 15=PMcAF.\cr # # Pseudo rule for finding out the probe-type value:\cr # \itemize{ # \item PM/MM: For MMs add 0, for PMs add 8. # \item A/B: For Bs add 0, for As add 4. # \item o/c: For shifted (o) add 0, for centered (c) add 2. # \item R/F: For antisense (R) add 0, for sense (F) add 1. # } # Example: (PM,A,c,R) = 8 + 4 + 2 + 0 = 14 (=PMcAR) # } # # @author "HB" # # \references{ # [1] LaFramboise T, Weir BA, Zhao X, Beroukhim R, Li C, Harrington D, # Sellers WR, and Meyerson M. \emph{Allele-specific amplification in # cancer revealed by SNP array analysis}, PLoS Computational Biology, # Nov 2005, Volume 1, Issue 6, e65.\cr # } # # @keyword programming # @keyword internal #**/####################################################################### cdfAddPlasqTypes <- function(groups, ...) { nbrOfGroups <- length(groups); nbrOfStrands <- nbrOfGroups %/% 2; for (kk in 1:nbrOfStrands) { groupA <- groups[[2*kk-1]]; groupB <- groups[[2*kk]]; # Identify the interrogating probe pair (quartet) isInterrogating <- (groupA$tbase != groupB$tbase); groupA$isInterrogating <- groupB$isInterrogating <- isInterrogating; groups[[2*kk-1]] <- groupA; groups[[2*kk]] <- groupB; } for (gg in seq_along(groups)) { group <- groups[[gg]]; # PM or MM? tbase <- group$tbase; pbase <- group$pbase; isPm <- ((tbase == "a" | tbase == "A") & (pbase == "t" | pbase == "T")) | ((tbase == "t" | tbase == "T") & (pbase == "a" | pbase == "A")) | ((tbase == "c" | tbase == "C") & (pbase == "g" | pbase == "G")) | ((tbase == "g" | tbase == "G") & (pbase == "c" | pbase == "C")); # Allele A or B? isA <- (gg %% 2 == 1); # Sense or antisense? fields <- names(group); # readCdfUnits() structure? pos <- which("direction" == fields); if (length(pos) > 0) { direction <- .subset2(group, pos); isSense <- (direction == 1); } else { # ...otherwise readCdf(). direction <- .subset2(group, "groupdirection"); isSense <- (direction == "sense"); } # Centered or shifted? isCentered <- group$isInterrogating; # PM/MM: For MMs add 0, for PMs add 8. # A/B: For Bs add 0, for As add 4. # o/c: For shifted add 0, for centered add 2. # +/-: For antisense add 0, for sense add 1. group$plasqType <- (8*isPm + 4*isA + 2*isCentered + 1*isSense); groups[[gg]] <- group; } groups; } ############################################################################ # HISTORY: # 2007-01-05 # o Made the code work with new readCdfUnits() as well as the readCdf(). # The former hasn't been tested though. # 2006-12-30 # o Requires that readCdf() is used. # o Created. ############################################################################ affxparser/R/cdfAddProbeOffsets.R0000644000175200017520000000532214516003651017753 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction cdfAddProbeOffsets # # @title "Adds probe offsets to the groups in a CDF structure" # # \description{ # @get "title". # # This @function is design to be used with @see "applyCdfGroups" # on an Affymetrix Mapping (SNP) CDF @list structure. # } # # @synopsis # # \arguments{ # \item{groups}{A @list structure with groups. # Each group must contain the fields \code{tbase}, and \code{expos}. # } # \item{...}{Not used.} # } # # \value{ # Returns a @list structure with half the number of groups as the # \code{groups} argument (since allele A and allele B groups have # been joined). # } # # \seealso{ # @see "applyCdfGroups". # } # # @author "HB" # # \references{ # [1] Affymetrix, \emph{Understanding Genotyping Probe Set Structure}, 2005. # \url{http://www.affymetrix.com/support/developer/whitepapers/genotyping_probe_set_structure.affx}\cr # } # # @keyword programming # @keyword internal #**/####################################################################### cdfAddProbeOffsets <- function(groups, ...) { nbrOfGroups <- length(groups); nbrOfStrands <- nbrOfGroups %/% 2; for (kk in 1:nbrOfStrands) { groupA <- groups[[2*kk-1]]; groupB <- groups[[2*kk]]; # Identify the interrogating probe pair (quartet) isInterrogating <- (groupA$tbase != groupB$tbase); # Get the Expos value of the interrogating quartet. centerExpos <- which(isInterrogating)[1]; # Does an interrogating quartet exists? hasInterrogating <- !is.na(centerExpos); if (hasInterrogating) { # Yes: Calculate (true) offsets offsetA <- groupA$expos - groupA$expos[centerExpos]; offsetB <- groupB$expos - groupB$expos[centerExpos]; } else { # No: Guess offsets by assuming first probe pair is shifted # -4 positions. Assert that no other quartet has zero # shift. If shift only -3 and so on. offsetA <- groupA$expos - groupA$expos[1]; offsetB <- groupB$expos - groupB$expos[1]; for (shift in -4:+4) { # Results in a zero offset? if (any(offsetA != -shift)) break; } offsetA <- offsetA + shift; offsetB <- offsetB + shift; } # Add inferred knowledge groupA$hasInterrogating <- groupB$hasInterrogating <- hasInterrogating; groupA$offset <- offsetA; groupB$offset <- offsetB; groups[[2*kk-1]] <- groupA; groups[[2*kk]] <- groupB; } groups; } # cdfAddProbeOffsets() ############################################################################ # HISTORY: # 2006-06-19 # o Added more Rdoc help. # 2006-03-07 # o Created. ############################################################################ affxparser/R/cdfGetFields.R0000644000175200017520000000225314516003651016607 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction cdfGetFields # # @title "Gets a subset of groups fields in a CDF structure" # # \description{ # @get "title". # # This @function is designed to be used with @see "applyCdfGroups". # } # # @synopsis # # \arguments{ # \item{groups}{A @list of groups.} # \item{fields}{A @character @vector of names of fields to be returned.} # \item{...}{Not used.} # } # # \value{ # Returns a @list structure of groups. # } # # \details{ # Note that an error is \emph{not} generated for missing fields. # Instead the field is returned with value @NA. The reason for this # is that it is much faster. # } # # \seealso{ # @see "applyCdfGroups". # } # # @author "HB" # # @keyword programming # @keyword internal #**/####################################################################### cdfGetFields <- function(groups, fields, ...) { lapply(groups, function(group) .subset(group, fields)) } ############################################################################ # HISTORY: # 2006-03-21 (Stockholm, Sveavagen) # o Created. ############################################################################ affxparser/R/cdfGetGroups.R0000644000175200017520000000166514516003651016666 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction cdfGetGroups # # @title "Gets a subset of groups in a CDF structure" # # \description{ # @get "title". # # This @function is designed to be used with @see "applyCdfGroups". # } # # @synopsis # # \arguments{ # \item{groups}{A @list of groups.} # \item{which}{An @integer or @character @vector of groups be returned.} # \item{...}{Not used.} # } # # \value{ # Returns a @list structure of groups. # } # # \seealso{ # @see "applyCdfGroups". # } # # @author "HB" # # @keyword programming # @keyword internal #**/####################################################################### cdfGetGroups <- function(groups, which, ...) { .subset(groups, which); } ############################################################################ # HISTORY: # 2006-04-24 # o Created. ############################################################################ affxparser/R/cdfGtypeCelToPQ.R0000644000175200017520000000355714516003651017231 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction cdfGtypeCelToPQ # # @title "Function to imitate Affymetrix' gtype_cel_to_pq software" # # \description{ # @get "title". # # This @function is design to be used with @see "applyCdfGroups" # on an Affymetrix Mapping (SNP) CDF @list structure. # } # # @synopsis # # \arguments{ # \item{groups}{A @list structure with groups.} # \item{...}{Not used.} # } # # \value{ # Returns a @list structure with a single group. The fields in this # groups are in turn vectors (all of equal length) where the elements # are stored as subsequent quartets (PMA, MMA, PMB, MMB) with all # forward-strand quartets first followed by all reverse-strand quartets. # } # # \seealso{ # @see "applyCdfGroups". # } # # @author "HB" # # \references{ # [1] Affymetrix, \emph{Understanding Genotyping Probe Set Structure}, 2005. # \url{http://www.affymetrix.com/support/developer/whitepapers/genotyping_probe_set_structure.affx}\cr # } # # @keyword programming # @keyword internal #**/####################################################################### cdfGtypeCelToPQ <- function(groups, ...) { nbrOfGroups <- length(groups); if (nbrOfGroups < 2) return(groups); # Merge forward groups 1 & 3, and reverse groups 2 & 4. groups <- cdfMergeStrands(groups, ...); # Stack allele A and allele B groups <- cdfMergeToQuartets(groups, ...); # Now there is only one group. Rename it to "quartets" names(groups) <- "quartets"; # Vectorize each field fields <- groups[[1]]; fields <- lapply(fields, FUN=as.vector); groups[[1]] <- fields; groups; } ############################################################################ # HISTORY: # 2006-03-11 # o Created, partly to verify the correctness of affxparser. ############################################################################ affxparser/R/cdfHeaderToCelHeader.R0000644000175200017520000000450114516003651020167 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction cdfHeaderToCelHeader # # @title "Creates a valid CEL header from a CDF header" # # \description{ # @get "title". # } # # @synopsis # # \arguments{ # \item{cdfHeader}{A CDF @list structure.} # \item{sampleName}{The name of the sample to be added to the CEL header.} # \item{date}{The (scan) date to be added to the CEL header.} # \item{...}{Not used.} # \item{version}{The file-format version of the generated CEL file. # Currently only version 4 is supported.} # } # # \value{ # Returns a CDF @list structure. # } # # @author "HB" # # @keyword programming # @keyword internal #**/####################################################################### cdfHeaderToCelHeader <- function(cdfHeader, sampleName="noname", date=Sys.time(), ..., version="4") { # Argument 'version': version <- match.arg(version); celHeader <- list(); # File format version celHeader$version <- version; # Chip dimensions celHeader$cols <- cdfHeader$cols; celHeader$rows <- cdfHeader$rows; celHeader$total <- celHeader$rows * celHeader$cols; # Chip type celHeader$chiptype <- cdfHeader$chiptype; # Miscellaneous # Algorithm celHeader$algorithm <- "NoAlgorithm"; pd <- packageDescription("affxparser"); creator <- sprintf("Creator:%s;Version:%s;", pd$Package, pd$Version); celHeader$parameters <- paste(creator, sep=""); # CEL v3 header cols <- celHeader$cols; rows <- celHeader$rows; datHeader <- list( pixelRange="[0..65535]", sampleName=sampleName, CLS=cols, RWS=rows, XIN=0, YIN=0, VE=0, scanTemp="", laserPower="", scanDate=format(date, "%m/%d/%y %H:%M:%S"), scanner=list(id="", type=""), chipType=celHeader$chiptype ) datHeader <- .wrapDatHeader(datHeader); header <- sprintf("Cols=%d\nRows=%d\nTotalX=%d\nTotalY=%d\nOffsetX=0\nOffsetY=0\nGridCornerUL=0 0\nGridCornerUR=%d 0\nGridCornerLR=%d %d\nGridCornerLL=0 %d\nAxis-invertX=0\nAxisInvertY=0\nswapXY=0\nDatHeader=%s\nAlgorithm=%s\nAlgorithmParameters=%s\n", cols, rows, cols, rows, cols, cols, rows, rows, datHeader, celHeader$algorithm, celHeader$parameters); celHeader$header <- header; # Extras celHeader$cellmargin <- 2; celHeader$noutliers <- 1; celHeader$nmasked <- 1; celHeader; } # cdfHeaderToCelHeader() affxparser/R/cdfMergeAlleles.R0000644000175200017520000000716414516003651017310 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction cdfMergeAlleles # # @title "Function to join CDF allele A and allele B groups strand by strand" # # \description{ # @get "title". # # This @function is design to be used with @see "applyCdfGroups" # on an Affymetrix Mapping (SNP) CDF @list structure. # } # # @synopsis # # \arguments{ # \item{groups}{A @list structure with groups.} # \item{compReverseBases}{If @TRUE, the group names, which typically are # names for bases, are turned into their complementary bases for the # reverse strand.} # \item{collapse}{The @character string used to collapse the allele A # and the allele B group names.} # \item{...}{Not used.} # } # # \value{ # Returns a @list structure with the two groups \code{forward} # and \code{reverse}, if the latter exists. # } # # \details{ # Allele A and allele B are merged into a @matrix where first row # hold the elements for allele A and the second elements for allele B. # } # # \seealso{ # @see "applyCdfGroups". # } # # @author "HB" # # \references{ # [1] Affymetrix, \emph{Understanding Genotyping Probe Set Structure}, 2005. # \url{http://www.affymetrix.com/support/developer/whitepapers/genotyping_probe_set_structure.affx}\cr # } # # @keyword programming # @keyword internal #**/####################################################################### cdfMergeAlleles <- function(groups, compReverseBases=FALSE, collapse="", ...) { nbrOfGroups <- length(groups); # Allocate the new groups nbrOfStrands <- nbrOfGroups %/% 2; newGroups <- vector("list", nbrOfStrands); groupNames <- names(groups); for (kk in 1:nbrOfStrands) { kk2 <- 2*kk; groupA <- .subset2(groups, kk2-1); groupB <- .subset2(groups, kk2); # Allocate the fields nbrOfFields <- length(groupA); newGroup <- vector("list", nbrOfFields); # Join the fields of allele A and allele B. for (ff in seq_len(nbrOfFields)) { fieldA <- .subset2(groupA, ff); fieldB <- .subset2(groupB, ff); ndim <- length(dim(fieldA)); if (ndim <= 1) { # If empty or a vector, stack then into a matrix. fieldA <- rbind(fieldA, fieldB); rownames(fieldA) <- c("A", "B"); } else if (ndim == 2) { # If a matrix, stack into a new matrix. rownames <- c(paste(rownames(fieldA), "A", sep=""), paste(rownames(fieldB), "B", sep="")); fieldA <- rbind(fieldA, fieldB); rownames(fieldA) <- rownames; } else { # Otherwise, just append the values. Maybe we should # do something smarter here?!? /HB 2006-03-07 fieldA <- c(fieldA, fieldB); } newGroup[[ff]] <- fieldA; } # for (ff ...); # Set the name of the fields names(newGroup) <- names(groupA); newGroups[[kk]] <- newGroup; } if (compReverseBases) { groupNames[c(3,4)] <- c(A="T", C="G", G="C", T="A")[groupNames[c(3,4)]]; } nameFwd <- paste(groupNames[c(1,2)], collapse=collapse); if (nbrOfStrands == 2) { nameRev <- paste(groupNames[c(3,4)], collapse=collapse); names(newGroups) <- c(nameFwd, nameRev); } else { names(newGroups) <- nameFwd; } newGroups; } ############################################################################ # HISTORY: # 2006-05-04 # o Now the names of the generated groups are constructed from the allele # A and B group names. Before they were only "forward" and "reverse". # o Renamed from cdfMergeToQuartets(). # 2006-03-07 # o Renamed from cdfStandJoiner() to cdfMergeStrands(). # 2006-02-23 # o Created. ############################################################################ affxparser/R/cdfMergeStrands.R0000644000175200017520000000541214516003651017337 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction cdfMergeStrands # # @title "Function to join CDF groups with the same names" # # \description{ # @get "title". # # This @function is design to be used with @see "applyCdfGroups" # on an Affymetrix Mapping (SNP) CDF @list structure. # # This can be used to join the sense and anti-sense groups of the same # allele in SNP arrays. # } # # @synopsis # # \arguments{ # \item{groups}{A @list structure with groups.} # \item{...}{Not used.} # } # # \value{ # Returns a @list structure with only two groups. # } # # \details{ # If a unit has two strands, they are merged such that the elements # for the second strand are concatenated to the end of the elements # of first strand (This is done separately for the two alleles). # } # # \seealso{ # @see "applyCdfGroups". # } # # @author "HB" # # \references{ # [1] Affymetrix, \emph{Understanding Genotyping Probe Set Structure}, 2005. # \url{http://www.affymetrix.com/support/developer/whitepapers/genotyping_probe_set_structure.affx}\cr # } # # @keyword programming # @keyword internal #**/####################################################################### cdfMergeStrands <- function(groups, ...) { nbrOfGroups <- length(groups); if (nbrOfGroups != 2 && nbrOfGroups != 4 && nbrOfGroups %% 2 != 0) return(groups); names <- names(groups); unames <- unique(names); res <- list(); # For each allele... for (name in unames) { idx <- which(name == names); group <- .subset2(groups, idx[1]); nfields <- length(group); if (nfields > 0) { ffs <- 1:nfields; idx <- idx[-1]; while(length(idx) > 0) { groupNext <- .subset2(groups, idx[1]); # For each field... for (ff in ffs) { fields <- .subset2(group, ff); fieldsNext <- .subset2(groupNext, ff); ndim <- length(dim(fields)); if (ndim <= 1) { fields <- c(fields, fieldsNext); } else if (ndim == 2) { fields <- cbind(fields, fieldsNext); } else { # This should never occur for a normal CDF structure. fields <- c(fields, fieldsNext); } group[[ff]] <- fields; } idx <- idx[-1]; } # while(...) } res[[name]] <- group; } # for (name ...) res; } # cdfMergeStrands() ############################################################################ # HISTORY: # 2008-02-22 # o Updated so it now merges any unit with a even number of groups; # a custom SNP CDF had three pairs of groups in part of their units. # 2006-03-07 # o Renamed from cdfStandJoiner() to cdfMergeStrands(). # 2006-02-23 # o Created. ############################################################################ affxparser/R/cdfMergeToQuartets.R0000644000175200017520000000523514516003651020037 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction cdfMergeToQuartets # # @title "Function to re-arrange CDF groups values in quartets" # # \description{ # @get "title". # # This @function is design to be used with @see "applyCdfGroups" # on an Affymetrix Mapping (SNP) CDF @list structure. # # Note, this requires that the group values have already been # arranged in PMs and MMs. # } # # @synopsis # # \arguments{ # \item{groups}{A @list structure with groups.} # \item{...}{Not used.} # } # # \value{ # Returns a @list structure with the two groups \code{forward} # and \code{reverse}, if the latter exists. # } # # \seealso{ # @see "applyCdfGroups". # } # # @author "HB" # # \references{ # [1] Affymetrix, \emph{Understanding Genotyping Probe Set Structure}, 2005. # \url{http://www.affymetrix.com/support/developer/whitepapers/genotyping_probe_set_structure.affx}\cr # } # # @keyword programming # @keyword internal #**/####################################################################### cdfMergeToQuartets <- function(groups, ...) { nbrOfGroups <- length(groups); # Allocate the new groups nbrOfStrands <- nbrOfGroups %/% 2; newGroups <- vector("list", nbrOfStrands); for (kk in 1:nbrOfStrands) { groupA <- groups[[2*kk-1]]; groupB <- groups[[2*kk]]; # Allocate the fields nbrOfFields <- length(groupA); newGroup <- vector("list", nbrOfFields); # Join the fields of allele A and allele B. for (ff in seq_len(nbrOfFields)) { fieldA <- groupA[[ff]]; fieldB <- groupB[[ff]]; ndim <- length(dim(fieldA)); if (ndim <= 1) { # If empty or a vector, stack then into a matrix. fieldA <- rbind(fieldA, fieldB); rownames(fieldA) <- c("A", "B"); } else if (ndim == 2) { # If a matrix, stack into a new matrix. rownames <- c(paste(rownames(fieldA), "A", sep=""), paste(rownames(fieldB), "B", sep="")); fieldA <- rbind(fieldA, fieldB); rownames(fieldA) <- rownames; } else { # Otherwise, just append the values. Maybe we should # do something smarter here?!? /HB 2006-03-07 fieldA <- c(fieldA, fieldB); } newGroup[[ff]] <- fieldA; } names(newGroup) <- names(groupA); newGroups[[kk]] <- newGroup; } names(newGroups) <- rep(c("forward", "reverse"), length.out=nbrOfStrands); newGroups; } ############################################################################ # HISTORY: # 2006-03-07 # o Renamed from cdfStandJoiner() to cdfMergeStrands(). # 2006-02-23 # o Created. ############################################################################ affxparser/R/cdfOrderBy.R0000644000175200017520000000225214516003651016306 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction cdfOrderBy # # @title "Orders the fields according to the value of another field in the same CDF group" # # \description{ # @get "title". # # This @function is design to be used with @see "applyCdfGroups" # on an Affymetrix Mapping (SNP) CDF @list structure. # } # # @synopsis # # \arguments{ # \item{groups}{A @list of groups.} # \item{field}{The field whose values are used to order the other fields.} # \item{...}{Optional arguments passed @see "base::order".} # } # # \value{ # Returns a @list structure of groups. # } # # \seealso{ # @see "cdfOrderColumnsBy". # @see "applyCdfGroups". # } # # @author "HB" # # @keyword programming # @keyword internal #**/####################################################################### cdfOrderBy <- function(groups, field, ...) { lapply(groups, function(group) { o <- order(.subset2(group, field), ...); lapply(group, FUN=.subset, o); }) } ############################################################################ # HISTORY: # 2006-04-20 # o Created. ############################################################################ affxparser/R/cdfOrderColumnsBy.R0000644000175200017520000000303114516003651017643 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction cdfOrderColumnsBy # # @title "Orders the columns of fields according to the values in a certain row of another field in the same CDF group" # # \description{ # @get "title". # Note that this method requires that the group fields are matrices. # # This @function is design to be used with @see "applyCdfGroups" # on an Affymetrix Mapping (SNP) CDF @list structure. # } # # @synopsis # # \arguments{ # \item{groups}{A @list of groups.} # \item{field}{The field whose values in row \code{row} are used to order # the other fields.} # \item{row}{The row of the above field to be used to find the order.} # \item{...}{Optional arguments passed @see "base::order".} # } # # \value{ # Returns a @list structure of groups. # } # # \seealso{ # @see "cdfOrderBy". # @see "applyCdfGroups". # } # # @author "HB" # # @keyword programming # @keyword internal #**/####################################################################### cdfOrderColumnsBy <- function(groups, field, row=1, ...) { lapply(groups, function(group) { occ <- order(.subset2(group, field)[row,], ...); lapply(group, FUN=function(field) { nrow <- .subset(dim(field), 1); if (nrow > 0) { .subset(field, 1:nrow, occ); } else { field; } }) }) } ############################################################################ # HISTORY: # 2006-04-20 # o Created. ############################################################################ affxparser/R/cdfSetDimension.R0000644000175200017520000000320514516003651017340 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction cdfSetDimension # # @title "Sets the dimension of an object" # # \description{ # @get "title". # # This @function is designed to be used with @see "applyCdfGroupFields". # } # # @synopsis # # \arguments{ # \item{groups}{A @list of groups.} # \item{which}{An @integer or @character @vector of groups be returned.} # \item{...}{Not used.} # } # # \value{ # Returns a @list structure of groups. # } # # \seealso{ # @see "applyCdfGroupFields". # } # # @author "HB" # # @keyword programming # @keyword internal #**/####################################################################### cdfSetDimension <- function(field, dim, ...) { n <- length(field); ndim <- length(dim); naDim <- which(is.na(dim)); if (length(naDim) > 0) { if (length(naDim) > 1) { stop("Cannot infer dimension. Only one of the dimension can be unknown: ", paste(dim, collapse="x")); } dimNA <- n / prod(dim[-naDim]); if (dimNA %% 1 == 0) { dim[naDim] <- dimNA; dim(field) <- dim; } } else if (n == prod(dim)) { dim(field) <- dim; } field; } # cdfSetDimension() ############################################################################ # HISTORY: # 2013-09-23 # o SPEEDUP/CLEANUP: Package now uses which() instead of whichVector() # of 'R.utils'. Before R (< 2.11.0), which() used to be 10x slower # than whichVector(), but now it's 3x faster. # 2009-05-29 # o Added Rdoc comments. # o Created. ############################################################################ affxparser/R/compareCdfs.R0000644000175200017520000001537414516003651016522 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction compareCdfs # # @title "Compares the contents of two CDF files" # # @synopsis # # \description{ # @get "title". # } # # \arguments{ # \item{pathname}{The pathname of the first CDF file.} # \item{other}{The pathname of the seconds CDF file.} # \item{quick}{If @TRUE, only a subset of the units are compared, # otherwise all units are compared.} # \item{verbose}{An @integer. The larger the more details are printed.} # \item{...}{Not used.} # } # # \value{ # Returns @TRUE if the two CDF are equal, otherwise @FALSE. If @FALSE, # the attribute \code{reason} contains a string explaining what # difference was detected, and the attributes \code{value1} and # \code{value2} contain the two objects/values that differs. # } # # \details{ # The comparison is done with an upper-limit memory usage, regardless of # the size of the CDFs. # } # # @author "HB" # # \seealso{ # @see "convertCdf". # } # # @keyword "file" # @keyword "IO" #*/######################################################################### compareCdfs <- function(pathname, other, quick=FALSE, verbose=0, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - differentUnit <- function(value1, value2, units) { n <- length(units); # Done? if (n == 0) return(NULL); # Cannot narrow down? if (length(value1) != n) return(NULL); if (length(value2) != n) return(NULL); # Compare if (n == 1) { res <- all.equal(value1, value2); # Different? if (!identical(res, TRUE)) { return(units); } else { return(NULL); } } half <- floor(n/2); head <- 1:half; tail <- (half+1):n; # Among first half? unit <- differentUnit(value1[head], value2[head], units=units[head]); if (!is.null(unit)) return(unit); # Among second half? unit <- differentUnit(value1[tail], value2[tail], units=units[tail]); if (!is.null(unit)) return(unit); NULL; } # differentUnit() different <- function(fmtstr, ..., units=NULL, value1=NULL, value2=NULL) { res <- FALSE; attr(res, "reason") <- sprintf(fmtstr, ...); attr(res, "units") <- units; attr(res, "value1") <- value1; attr(res, "value2") <- value2; res; } # different() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'pathname': # Expand any '~' in the pathname. pathname <- file.path(dirname(pathname), basename(pathname)); if (!file.exists(pathname)) { stop("Cannot compare CDFs. File not found: ", pathname); } # Expand any '~' in the pathname. other <- file.path(dirname(other), basename(other)); if (!file.exists(other)) { stop("Cannot compare CDFs. File not found: ", other); } # Argument 'verbose': verbose <- as.integer(verbose); if (verbose >= 1) { cat("Comparing CDFs...\n"); cat(" CDF 1: ", pathname, "\n", sep=""); cat(" CDF 2: ", other, "\n", sep=""); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Compare headers # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (verbose >= 1) cat(" Comparing CDF headers...\n"); h1 <- readCdfHeader(pathname); h2 <- readCdfHeader(other); for (ff in c("nrows", "ncols", "nunits", "nqcunits", "refseq")) { if (!identical(h1[[ff]], h2[[ff]])) return(different("%s: %s != %s", ff, h1[[ff]], h2[[ff]])); } if (verbose >= 1) cat(" Comparing CDF headers...done\n"); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Compare QC units # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (verbose >= 1) cat(" Comparing QC units...\n"); units <- seq_len(h1$nqcunits); while (length(units) > 0) { head <- 1:min(length(units),10); uu <- units[head]; units <- units[-head]; v1 <- readCdfQc(pathname, units=uu); v2 <- readCdfQc(other, units=uu); res <- all.equal(v1, v2); if (!identical(res, TRUE)) { badUnit <- differentUnit(value1=v1, value2=v2, units=uu); if (!is.null(badUnit)) { msg <- sprintf("Detected (at least one) QC unit that differ: %d", badUnit); units <- badUnit; idx <- match(badUnit, uu); v1 <- v1[idx]; v2 <- v2[idx]; } else { msg <- sprintf("Detected (at least one) QC unit that differ amount units %d to %d", min(uu), max(uu)); } return(different(msg, units=units, value1=v1, value2=v2)); } v1 <- v2 <- uu <- head <- NULL; # Not needed anymore } if (verbose >= 1) cat(" Comparing QC units...done\n"); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Compare units # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (verbose >= 1) { cat(" Comparing units...\n"); if (quick) cat(" Quick mode. Will only check a subset of the units...done\n"); } if (verbose >= 2) cat(" Progress: "); units <- seq_len(h1$nunits); count <- 0; while (length(units) > 0) { head <- 1:min(length(units),500); if (verbose >= 2) cat(sprintf("%d%%, ", as.integer(100*count/h1$nunits))); uu <- units[head]; units <- units[-head]; v1 <- readCdf(pathname, units=uu); v2 <- readCdf(other, units=uu); res <- all.equal(v1, v2); if (!identical(res, TRUE)) { badUnit <- differentUnit(value1=v1, value2=v2, units=uu); if (!is.null(badUnit)) { msg <- sprintf("Detected (at least one) unit that differ: %d", badUnit); units <- badUnit; idx <- match(badUnit, uu); v1 <- v1[idx]; v2 <- v2[idx]; } else { msg <- sprintf("Detected (at least one) unit that differ amount units %d to %d", min(uu), max(uu)); } return(different(msg, units=units, value1=v1, value2=v2)); } count <- count + length(uu); if (quick) break; v1 <- v2 <- uu <- head <- NULL; # Not needed anymore } if (verbose >= 2) cat("100%.\n"); if (verbose >= 1) cat(" Comparing units...done\n"); if (verbose >= 1) cat("Comparing CDFs...done\n"); TRUE; } # compareCdfs() ############################################################################ # HISTORY: # 2012-10-18 # o Now compareCdfs() gives a more precise 'reason' attribute when there # is a difference in (regular or QC) units. It narrows down the first # unit that differs and reports it unit number. # 2006-09-10 # o Added argument 'quick' to check only a subset of the units. # 2006-09-09 # o Created. ############################################################################ affxparser/R/compareCels.R0000644000175200017520000000754614516003651016533 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction compareCels # # @title "Compares the contents of two CEL files" # # @synopsis # # \description{ # @get "title". # } # # \arguments{ # \item{pathname}{The pathname of the first CEL file.} # \item{other}{The pathname of the seconds CEL file.} # \item{readMap}{An optional read map for the first CEL file.} # \item{otherReadMap}{An optional read map for the second CEL file.} # \item{verbose}{An @integer. The larger the more details are printed.} # \item{...}{Not used.} # } # # \value{ # Returns @TRUE if the two CELs are equal, otherwise @FALSE. If @FALSE, # the attribute \code{reason} contains a string explaining what # difference was detected, and the attributes \code{value1} and # \code{value2} contain the two objects/values that differs. # } # # @author "HB" # # \seealso{ # @see "convertCel". # } # # @keyword "file" # @keyword "IO" #*/######################################################################### compareCels <- function(pathname, other, readMap=NULL, otherReadMap=NULL, verbose=0, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - different <- function(fmtstr, ..., value1=NULL, value2=NULL) { res <- FALSE; attr(res, "reason") <- sprintf(fmtstr, ...); attr(res, "value1") <- value1; attr(res, "value2") <- value2; res; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'pathname': # Expand any '~' in the pathname. pathname <- file.path(dirname(pathname), basename(pathname)); if (!file.exists(pathname)) { stop("Cannot compare CELs. File not found: ", pathname); } # Expand any '~' in the pathname. other <- file.path(dirname(other), basename(other)); if (!file.exists(other)) { stop("Cannot compare CELs. File not found: ", other); } # Argument 'verbose': verbose <- as.integer(verbose); if (verbose >= 1) { cat("Comparing CELs...\n"); cat(" CEL 1: ", pathname, "\n", sep=""); cat(" CEL 2: ", other, "\n", sep=""); } if (verbose) cat("Reading first...\n"); cel1 <- readCel(pathname, readHeader=TRUE, readIntensities=TRUE, readStdvs=TRUE, readPixels=TRUE, readOutliers=FALSE, readMasked=FALSE, readMap=readMap); if (verbose) cat("Reading first...done\n"); if (verbose) cat("Reading second...\n"); cel2 <- readCel(other, readHeader=TRUE, readIntensities=TRUE, readStdvs=TRUE, readPixels=TRUE, readOutliers=FALSE, readMasked=FALSE, readMap=otherReadMap); if (verbose) cat("Reading second...done\n"); # Compare headers if (verbose >= 1) cat(" Comparing CEL headers...\n"); excl <- c("filename", "version", "header", "datheader", "librarypackage"); for (ff in setdiff(names(cel1$header), excl)) { h1 <- cel1$header[[ff]]; h2 <- cel2$header[[ff]]; if (!identical(h1, h2)) { (different("%s: %s != %s", ff, h1[[ff]], h2[[ff]])); } } if (verbose >= 1) cat(" Comparing CEL headers...done\n"); # Compare data if (verbose >= 1) cat(" Comparing CEL data...\n"); for (ff in c("intensities", "stdvs", "pixels")) { v1 <- cel1[[ff]]; v2 <- cel2[[ff]]; if (!identical(all.equal(v1, v2), TRUE)) { stop("Validation of new CEL file failed. Field differ: ", ff); } } if (verbose >= 1) cat(" Comparing CEL data...done\n"); if (verbose >= 1) cat("Comparing CELs...done\n"); TRUE; } # compareCels() ############################################################################ # HISTORY: # 2012-05-18 # o Now using stop() instead of throw(). # 2007-01-03 # o Created from compareCdfs.R. ############################################################################ affxparser/R/convertCdf.R0000644000175200017520000001536314516003651016367 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction convertCdf # # @title "Converts a CDF into the same CDF but with another format" # # @synopsis # # \description{ # @get "title". # Currently only CDF files in version 4 (binary/XDA) can be written. # However, any input format is recognized. # } # # \arguments{ # \item{filename}{The pathname of the original CDF file.} # \item{outFilename}{The pathname of the destination CDF file. # If the same as the source file, an exception is thrown.} # \item{version}{The version of the output file format.} # \item{force}{If @FALSE, and the version of the original CDF is the # same as the output version, the new CDF will not be generated, # otherwise it will.} # \item{...}{Not used.} # \item{.validate}{If @TRUE, a consistency test between the generated # and the original CDF is performed. Note that the memory overhead # for this can be quite large, because two complete CDF structures # are kept in memory at the same time.} # \item{verbose}{If @TRUE, extra details are written while processing.} # } # # \value{ # Returns (invisibly) @TRUE if a new CDF was generated, otherwise @FALSE. # } # # \section{Benchmarking of ASCII and binary CDFs}{ # Binary CDFs are much faster to read than ASCII CDFs. Here are some # example for reading complete CDFs (the difference is even larger when # reading CDFs in subsets): # \itemize{ # \item HG-U133A (22283 units): ASCII 11.7s (9.3x), binary 1.20s (1x). # \item Hu6800 (7129 units): ASCII 3.5s (6.1x), binary 0.57s (1x). # } # } # # \section{Confirmed conversions to binary (XDA) CDFs}{ # The following chip types have been converted using \code{convertCdf()} # and then verified for correctness using \code{compareCdfs()}: # ASCII-to-binary: HG-U133A, Hu6800. # Binary-to-binary: Test3. # } # # @examples "../incl/convertCdf.Rex" # # @author "HB" # # \seealso{ # See @see "compareCdfs" to compare two CDF files. # @see "writeCdf". # } # # @keyword "file" # @keyword "IO" #*/######################################################################### convertCdf <- function(filename, outFilename, version="4", force=FALSE, ..., .validate=TRUE, verbose=FALSE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filename': # Expand any '~' in the pathname. filename <- file.path(dirname(filename), basename(filename)); if (!file.exists(filename)) { stop("Cannot open CDF file. File does not exist: ", filename); } # Argument 'outFilename': # Expand any '~' in the pathname. outFilename <- file.path(dirname(outFilename), basename(outFilename)); if (identical(outFilename, filename)) { stop("Cannot convert CDF file. Destination is identical the the source pathname: ", filename); } # Argument 'version': version <- as.character(version); if (version == "4") { } else { stop("Cannot convert CDF. Currently only version 4 (binary/XDA) can be written: ", version); } # Argument 'force': force <- as.logical(force); # Argument '.validate': .validate <- as.logical(.validate); # Argument 'verbose': verbose <- as.integer(verbose); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read source CDF # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (verbose) cat("Reading CDF header...\n"); cdfHeader <- readCdfHeader(filename); # if (version == cdfHeader$version) { # if (!force) { # warning("The original CDF file is already in file-format version ", # version, ": ", filename); # if (verbose) # cat("Reading CDF header...done\n"); # return(invisible(FALSE)); # } # } if (verbose) cat("Reading CDF header...done\n"); # Read QC units if (verbose) cat("Reading CDF QC units...\n"); cdfQcUnits <- readCdfQc(filename) if (verbose) cat("Reading CDF QC units...done\n"); # Reading units if (verbose) cat("Reading CDF units...\n"); cdfUnits <- readCdf(filename) if (verbose) cat("Reading CDF units...done\n"); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Write new CDF file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (verbose) cat("Writing CDF structure...\n"); verbose2 <- verbose-1; if (verbose2 < 0) verbose2 <- 0; t <- system.time({ writeCdf(outFilename, cdfheader=cdfHeader, cdf=cdfUnits, cdfqc=cdfQcUnits, overwrite=TRUE, verbose=verbose2); }); if (verbose) { cat("Timing for writeCdf():\n"); print(t); } if (verbose) cat("Writing CDF structure...done\n"); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (.validate) { if (verbose) cat("Comparing CDFs...\n"); res <- compareCdfs(filename, outFilename, verbose=verbose); if (!res) { stop("An inconsistency between source and destination CDF was detected. Reason: ", attr(res, "reason")); } if (verbose) cat("Comparing CDFs...done\n"); } # if (.validate) invisible(TRUE); } # convertCdf() ############################################################################ # HISTORY: # 2007-07-26 # o Removed debug assignment 'res2 <<- res' before validation error message. # 2006-09-09 # o More benchmarking: A binary-to-binary convertion of Mapping50K_Hind240 # (56.0Mb; 57299 units) takes in total 25 mins. It is the writing of # units that is slow. # o Simple benchmarking: On Thinkpad A31 1Gb RAM; HG-U133A.CDF (22283 units) # ASCII (40.3Mb) -> XDA (10.8Mb): ~15s to read ASCII, ~270s to write XDA, # and ~420s to validate. # o Now validate is making use of compareCdfs(). # 2006-09-08 # o Added protection against overwriting the source file. # 2006-09-07 /HB # o Ken Simpson at WEHI just tried to convert an ASCII Exon CDF to a binary # CDF using convertCdf(). The ASCII CDF is ???MB, and the binary one # became 321MB. He did this on a 4-core with 16GB RAM Linux system, and # it took 45 minutes, which probably includes the validation tests, # which eventually made R run out of memory. I've updated the code a bit # to make the validation a little bit more memory efficient. It is # probably better to compare chunks of units and not all of them at the # same time. The writeCdf() function can be made faster if writing to # a raw vector internally which is the dumped to file. This should be # done in chunks to also optimize memory usage. # o Created. ############################################################################ affxparser/R/convertCel.R0000644000175200017520000001661414516003651016376 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction convertCel # # @title "Converts a CEL into the same CEL but with another format" # # @synopsis # # \description{ # @get "title". # Currently only CEL files in version 4 (binary/XDA) can be written. # However, any input format is recognized. # } # # \arguments{ # \item{filename}{The pathname of the original CEL file.} # \item{outFilename}{The pathname of the destination CEL file. # If the same as the source file, an exception is thrown.} # \item{readMap}{An optional read map for the input CEL file.} # \item{writeMap}{An optional write map for the output CEL file.} # \item{version}{The version of the output file format.} # \item{newChipType}{(Only for advanced users who fully understands # the Affymetrix CEL file format!) # An optional string for overriding the chip type (label) # in the CEL file header.} # \item{...}{Not used.} # \item{.validate}{If @TRUE, a consistency test between the generated # and the original CEL is performed.} # \item{verbose}{If @TRUE, extra details are written while processing.} # } # # \value{ # Returns (invisibly) @TRUE if a new CEL was generated, otherwise @FALSE. # } # # \section{Benchmarking of ASCII and binary CELs}{ # Binary CELs are much faster to read than ASCII CELs. Here are some # example for reading complete CELs (the difference is even larger when # reading CELs in subsets): # \itemize{ # \item To do # } # } # # \section{WARNING: Changing the chip type label}{ # The \code{newChipType} argument changes the label in the # part of DAT header that specifies the chip type of the # CEL file. Note that it does not change anything else in # the CEL file. This type of relabeling is valid for updating # the chip type \emph{label} of CEL files that where generated # during, say, an "Early Access" period leading to a different # chip type label than what more recent CEL files of the same # physical chip type have. # } # # @examples "../incl/convertCel.Rex" # # @author "HB" # # \seealso{ # @see "createCel". # } # # @keyword "file" # @keyword "IO" #*/######################################################################### convertCel <- function(filename, outFilename, readMap=NULL, writeMap=NULL, version="4", newChipType=NULL, ..., .validate=FALSE, verbose=FALSE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filename': # Expand any '~' in the pathname. filename <- file.path(dirname(filename), basename(filename)); if (!file.exists(filename)) { stop("Cannot open CEL file. File does not exist: ", filename); } # Argument 'outFilename': # Expand any '~' in the pathname. outFilename <- file.path(dirname(outFilename), basename(outFilename)); if (identical(outFilename, filename)) { stop("Cannot convert CEL file. Destination is identical the the source pathname: ", filename); } if (file.exists(outFilename)) { stop("Cannot convert CEL file. Destination file already exists: ", outFilename); } # Argument 'version': version <- as.character(version); if (version == "4") { } else { stop("Cannot convert CEL. Currently only version 4 (binary/XDA) can be written: ", version); } # Argument 'newChipType': if (!is.null(newChipType)) { newChipType <- as.character(newChipType); if (nchar(newChipType) == 0) { stop("Argument 'newChipType' cannot be an empty string."); } } # Argument 'verbose': verbose <- as.integer(verbose); verbose2 <- verbose-1; if (verbose2 < 0) verbose2 <- 0; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read source CEL # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (verbose) cat("Reading CEL file...\n"); cel <- readCel(filename, readHeader=TRUE, readXY=TRUE, readIntensities=TRUE, readStdvs=TRUE, readPixels=TRUE, readOutliers=FALSE, readMasked=FALSE, readMap=readMap); if (verbose) cat("Reading CEL file...done\n"); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Changing chip type? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - hdr <- cel$header; if (!is.null(newChipType)) { if (verbose) { cat("Updating the chip type label from '", hdr$chiptype, "' to '", newChipType, "'.\n", sep=""); } # Updating v3 headers for (field in c("header", "datheader")) { header <- hdr[[field]] if (is.null(header)) next pattern <- sprintf("%s.1sq", hdr$chiptype) target <- sprintf("%s.1sq", newChipType) header <- gsub(pattern, target, header, fixed=TRUE) hdr[[field]] <- header } # Updating chip type field (this is actually read only, because # the chip type is always inferred from the v3 header). hdr$chiptype <- newChipType; pattern <- target <- header <- NULL; # Not needed anymore } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Creating new CEL file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (verbose) cat("Creating empty CEL file...\n"); suppressWarnings({ # createCel() will generate a warning if the CDF file could not be # located, but that is all right. pathname <- createCel(outFilename, header=hdr, overwrite=FALSE, verbose=verbose2); }); hdr <- NULL; # Not needed anymore if (verbose) cat("Creating empty CEL file...done\n"); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update destination CEL file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (verbose) cat("Updating CEL file...\n"); updateCel(outFilename, intensities=cel, verbose=verbose2, writeMap=writeMap); cel <- NULL; # Not needed anymore if (verbose) cat("Updating CEL file...done\n"); if (.validate) { if (verbose) cat("Validating CEL file...\n"); if (is.null(writeMap)) { otherReadMap <- NULL; } else { otherReadMap <- invertMap(writeMap); } compareCels(filename, outFilename, readMap=readMap, otherReadMap=otherReadMap, verbose=verbose); if (verbose) cat("Validating CEL file...done\n"); } invisible(pathname); } # convertCel() ############################################################################ # HISTORY: # 2009-02-20 # o Removed all gc() in convertCel(). # o Added optional argument 'newChipType' to convertCel() for overriding # the default chip type. # 2007-09-12 # o Help page was refering to the "CDF" and not the "CEL" files. # o Now convertCel() will not give a warning if the CDF file is not found. # 2007-08-28 # o BUG FIX: If the destination file already existed, convertCel() would # correctly detect that, but would give the name of the source file. # 2007-08-16 # o Now createCel() coerces the CEL header to version 4, so writeCel() # should be more generic. # 2007-03-28 # o Memory optimization; removing non-needed objects asap and calls gc(). # o There was non-used variables in convertCel(). # 2007-01-04 # o Creates identical output according to validateCels(). # o Added 'readMap' and 'writeMap' arguments. # 2007-01-03 # o Created. ############################################################################ affxparser/R/copyCel.R0000644000175200017520000000325014516003651015660 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction copyCel # # @title "Copies a CEL file" # # \description{ # @get "title". # # The file must be a valid CEL file, if not an exception is thrown. # } # # @synopsis # # \arguments{ # \item{from}{The filename of the CEL file to be copied.} # \item{to}{The filename of destination file.} # \item{overwrite}{If @FALSE and the destination file already exists, # an exception is thrown, otherwise not.} # \item{...}{Not used.} # } # # \value{ # Return @TRUE if file was successfully copied, otherwise @FALSE. # } # # \seealso{ # @see "isCelFile". # } # # @author "HB" # # @keyword programming # @keyword file # @keyword IO # @keyword internal #**/####################################################################### copyCel <- function(from, to, overwrite=FALSE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'from': if (!file.exists(from)) { stop("Cannot copy CEL file. File not found: ", from); } # Argument 'to': if (file.exists(to) && !overwrite) { stop("Cannot copy CEL file. File already exists: ", to); } # Check if a CEL file if (!isCelFile(from)) { stop("Cannot copy CEL file. Source file is not a valid CEL file: ", from); } file.copy(from, to, overwrite=overwrite); } ############################################################################ # HISTORY: # 2006-07-10 # o Added check with isCelFile(). # 2006-06-22 # o Created. ############################################################################ affxparser/R/createCel.R0000644000175200017520000002030714516003651016153 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction createCel # # @title "Creates an empty CEL file" # # @synopsis # # \description{ # @get "title". # } # # \arguments{ # \item{filename}{The filename of the CEL file to be created.} # \item{header}{A @list structure describing the CEL header, similar # to the structure returned by @see "readCelHeader". This header # can be of any CEL header version.} # \item{overwrite}{If @FALSE and the file already exists, an exception # is thrown, otherwise the file is created.} # \item{nsubgrids}{The number of subgrids.} # \item{...}{Not used.} # \item{cdf}{(optional) The pathname of a CDF file for the CEL file # to be created. If given, the CEL header (argument \code{header}) # is validated against the CDF header, otherwise not. # If @TRUE, a CDF file is located automatically based using # \code{findCdf(header$chiptype)}. # } # \item{verbose}{An @integer specifying how much verbose details are # outputted.} # } # # \value{ # Returns (invisibly) the pathname of the file created. # } # # \details{ # Currently only binary (v4) CEL files are supported. # The current version of the method does not make use of the Fusion SDK, # but its own code to create the CEL file. # } # # \section{Redundant fields in the CEL header}{ # There are a few redundant fields in the CEL header. To make sure # the CEL header is consistent, redundant fields are cleared and # regenerated. For instance, the field for the total number of cells # is calculated from the number of cell rows and columns. # } # # @examples "../incl/createCel.Rex" # # @author "HB" # # @keyword "file" # @keyword "IO" #*/######################################################################### createCel <- function(filename, header, nsubgrids=0, overwrite=FALSE, ..., cdf=NULL, verbose=FALSE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - writeInteger <- function(con, data, ...) { writeBin(con=con, as.integer(data), size=4, endian="little"); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'overwrite': overwrite <- as.logical(overwrite); # Argument 'filename': if (file.exists(filename)) { if (!overwrite) stop("Cannot create CEL file. File already exists: ", filename) } # Argument 'header': if (!is.list(header)) { stop("Argument 'header' is not a list: ", mode(header)); } # Argument 'nsubgrids': nsubgrids <- as.integer(nsubgrids); if (nsubgrids < 0) { stop("Argument 'nsubgrids' is negative: ", nsubgrids); } # Argument 'cdf': if (is.character(cdf)) { if (!file.exists(cdf)) { stop("Cannot compare to CDF file. File not found: ", cdf); } } else if (is.logical(cdf)) { } else if (!is.null(cdf)) { stop("Invalid type of argument 'cdf': ", mode(cdf)); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Infer CEL version # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - version <- .getCelHeaderVersion(header); if (version == "1") { if (verbose) cat("Coercing CEL header to v4...\n"); header <- .getCelHeaderV4(header); if (verbose) cat("Coercing CEL header to v4...done\n"); } else if (version == "3") { header$version <- "4"; } else if (version == "4") { } # Check for supported versions if (header$version != "4") { stop("Failed create binary (XDA) CEL v4 file. Header object has a different version: ", header$version); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate/assign CEL header field 'total' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ncells <- header$cols*header$rows; if (!is.null(header$total)) { stopifnot(header$total == ncells); } else { header$total <- ncells; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Check CEL header against CDF? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (isTRUE(cdf)) { # This might take a long time cdf <- findCdf(header$chiptype); if (is.null(cdf)) { warning("Skipping validation of CEL header against CDF header. Could not find a CDF file for this chip type: ", header$chiptype); } } if (is.character(cdf)) { cdfHeader <- readCdfHeader(cdf); cdfTotal <- cdfHeader$ncols * cdfHeader$nrows; if (ncells != cdfTotal) { warning("The number of cells in the CEL file does not match that of the CDF file: ", ncells, " != ", cdfTotal); } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Open the file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - con <- file(filename, open="w+b"); on.exit(close(con)); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Write CEL header # # The function takes care of redundant fields, unwrapping & wrapping... # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - writeCelHeader(con=con, header, verbose=verbose); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # "Number of sub-grids." # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - writeInteger(con=con, nsubgrids); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Cell entries: (float, float, short) = 4+4+2=10 bytes/cell # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (ncells > 0) { seek(con=con, origin="current", where=10*ncells-1); writeBin(con=con, as.integer(0), size=1, endian="little"); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # "Masked entries - this consists of the XY coordinates of # those cells masked by the user. (short, short)" = 4 bytes/cells # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (header$nmasked > 0) { seek(con=con, origin="current", where=4*header$nmasked-1); writeBin(con=con, as.integer(0), size=1, endian="little"); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # "Outlier entries - this consists of the XY coordinates of those # cells called outliers by the software. (short, short)" = 4 bytes/cells # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (header$noutliers > 0) { seek(con=con, origin="current", where=4*header$noutliers-1); writeBin(con=con, as.integer(0), size=1, endian="little"); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Sub-grid entries: (integer, integer, float, float, float, float, # float, float, float, float, integer, integer, integer, integer) # = 6*integer + 8*float = 6*4+8*4 = 14*4 = 64 bytes/subgrid # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (nsubgrids > 0) { seek(con=con, origin="current", where=64*nsubgrids-1); writeBin(con=con, as.integer(0), size=1, endian="little"); } invisible(filename); } # createCel() ############################################################################ # HISTORY: # 2012-09-26 # o Added argument 'cdf=NULL' to createCel(). Note, the previous # implementation corresponded to cdf=TRUE. # o ROBUSTNESS: Now createCel() validates/sets CEL header field 'total' # based on 'cols' and 'rows'. # 2007-08-16 # o Updated createCel() so it coerces the CEL header to version 4. # 2006-09-07 # o Added Rdoc comments. # o Added a small check against the CDF file, if it exists. # o Managed to create the first CEL v4 file. The tricky part is to create # a valid CEL v4 header, because there are quite a bit of redundant # fields in there, which now are regenerated from the other fields to # make sure they are consistent. # 2006-09-03 # o Created. ############################################################################ affxparser/R/findCdf.R0000644000175200017520000001141614516003651015622 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction findCdf # # @title "Search for CDF files in multiple directories" # # \description{ # @get "title". # } # # @synopsis # # \arguments{ # \item{chipType}{A @character string of the chip type to search for.} # \item{paths}{A @character @vector of paths to be searched. # The current directory is always searched at the beginning. # If @NULL, default paths are searched. For more details, see below.} # \item{recursive}{If @TRUE, directories are searched recursively.} # \item{pattern}{A regular expression file name pattern to match.} # \item{...}{Additional arguments passed to @see "findFiles".} # } # # \value{ # Returns a @vector of the full pathnames of the files found. # } # # \details{ # Note, the current directory is always searched first, but never # recursively (unless it is added to the search path explicitly). # This provides an easy way to override other files in the search path. # # If \code{paths} is @NULL, then a set of default paths are searched. # The default search path constitutes: # \enumerate{ # \item \code{getOption("AFFX_CDF_PATH")} # \item \code{Sys.getenv("AFFX_CDF_PATH")} # } # # One of the easiest ways to set system variables for \R is to # set them in an \code{.Renviron} file, e.g. # \preformatted{ # # affxparser: Set default CDF path # AFFX_CDF_PATH=${AFFX_CDF_PATH};M:/Affymetrix_2004-100k_trios/cdf # AFFX_CDF_PATH=${AFFX_CDF_PATH};M:/Affymetrix_2005-500k_data/cdf # } # See @see "base::Startup" for more details. # } # # @examples "../incl/findCdf.Rex" # # \seealso{ # This method is used internally by @see "readCelUnits" if the CDF # file is not specified. # } # # @author "HB" # # @keyword file # @keyword IO #**/####################################################################### findCdf <- function(chipType=NULL, paths=NULL, recursive=TRUE, pattern="[.](c|C)(d|D)(f|F)$", ...) { settings <- getOption("affxparser.settings"); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Customized search method? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - findFcn <- settings$methods$findCdf; if (!is.null(findFcn)) { if (!is.list(findFcn)) findFcn <- list(findFcn); for (fcn in findFcn) { # Pass arguments as is pathnames <- fcn(chipType=chipType, paths=paths, recursive=recursive, pattern=pattern, ...); if (!is.null(pathnames)) return(pathnames); } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup search path and pattern # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.null(paths)) { paths <- c(getOption("AFFX_CDF_PATH"), Sys.getenv("AFFX_CDF_PATH")); } paths <- unlist(strsplit(paths, split=";"), use.names=FALSE); if (!is.null(chipType)) { if (regexpr("[.](c|C)(d|D)(f|F)$", chipType) !=-1) warning("Argument 'chipType' of findCdf() has suffix '.cdf':", chipType); pattern <- paste(chipType, pattern, sep=""); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Search # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 1. First, search the current directory pathnames <- findFiles(pattern=pattern, paths=".", recursive=FALSE, ...); if (!is.null(pathnames)) return(pathnames); # 2. Then, scan all of the search path findFiles(pattern=pattern, paths=paths, recursive=recursive, ...); } ############################################################################ # HISTORY: # 2007-08-27 # o The current path is never scanned recursively (unless explicitly # specified in the search path). This is to avoid endless scans in case # the search path has not been set. # o Added argument 'recursive=TRUE' to findCdf(). # 2007-02-12 [moved to affxparser 2007-03-28] # o Added option 'affxparser.settings' (for now private), allowing for an # alternative search function to be set it element methods$findCdf. # This is utilized by the aroma.affymetrix package to look for CDF # using a more formalized hierarchical directory structure. # 2006-09-21 # o findCdf() now gives a warning *.cdf extension is given, but tries not # to resolve it. # 2006-09-15 # o The current directory "." was not the first directory scanned as # documented. Also added cdf/ and data/cdf to the beginning and not # at the end of the search path. # 2006-03-14 # o Now making use of findFiles(), which makes this function much shorter. # o Migrated the code to work without R.utils too. # o Added system environment variable and option to the default path. # 2006-02-22 # o Created. ############################################################################ affxparser/R/findFiles.R0000644000175200017520000001745714516003651016203 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction findFiles # # @title "Finds one or several files in multiple directories" # # \description{ # @get "title". # } # # @synopsis # # \arguments{ # \item{pattern}{A regular expression file name pattern to match.} # \item{paths}{A @character @vector of paths to be searched.} # \item{recursive}{If @TRUE, the directory structure is searched # breath-first, in lexicographic order.} # \item{firstOnly}{If @TRUE, the method returns as soon as a matching # file is found, otherwise not.} # \item{allFiles}{If @FALSE, files and directories starting with # a period will be skipped, otherwise not.} # \item{...}{Arguments passed to @see "base::list.files".} # } # # \value{ # Returns a @vector of the full pathnames of the files found. # } # # \section{Paths}{ # The \code{paths} argument may also contain paths specified as # semi-colon (\code{";"}) separated paths, e.g. # \code{"/usr/;usr/bin/;.;"}. # } # # \section{Windows Shortcut links}{ # If package \pkg{R.utils} is available and loaded , Windows Shortcut links (*.lnk) # are recognized and can be used to imitate links to directories # elsewhere. For more details, see @see "R.utils::filePath". # } # # @author "HB" # # @keyword file # @keyword IO # @keyword internal #**/####################################################################### findFiles <- function(pattern=NULL, paths=NULL, recursive=FALSE, firstOnly=TRUE, allFiles=TRUE, ...) { # To please R CMD check filePath <- NULL; rm(list="filePath"); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - splitPaths <- function(paths, ...) { if (length(paths) == 0) return(NULL); # If in format "path1; path2;path3", split it to multiple strings. paths <- unlist(strsplit(paths, split=";")); paths <- gsub("[ \t]*$", "", gsub("^[ \t]*", "", paths)); paths <- paths[nchar(paths) > 0]; if (length(paths) == 0) return(NULL); paths; } # splitPaths() # Checks if a package is loaded or not (cut'n'paste from R.utils) isPackageLoaded <- function(package, version=NULL, ...) { s <- search(); if (is.null(version)) { s <- sub("_[0-9.-]*", "", s); } else { package <- paste(package, version, sep="_"); } pattern <- sprintf("package:%s", package); (pattern %in% s); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'paths': paths <- splitPaths(paths); if (is.null(paths)) paths <- "."; # Argument 'pattern': if (!is.null(pattern)) pattern <- as.character(pattern); # Argument 'recursive': recursive <- as.logical(recursive); # Argument 'firstOnly': firstOnly <- as.logical(firstOnly); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Prepare list of paths to be scanned # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - hasRutilsLoaded <- isPackageLoaded("R.utils"); ## hasRutils <- suppressWarnings(require(R.utils, quietly=TRUE)); # Don't search the same path twice paths <- unique(paths); # Don't search non-existing paths for (kk in seq_along(paths)) { path <- paths[kk]; # Example any '~':s path <- file.path(dirname(path), basename(path)); path <- gsub("^[.][/\\]", "", path); # Follow Windows shortcut links? if (hasRutilsLoaded) path <- filePath(path, expandLinks="any"); # Does the path exist and is it a directory # Note, isdir is TRUE for directories, FALSE for files, # *and* NA for non-existing files, e.g. items found by # list.files() but are broken Unix links. isDirectory <- identical(file.info(path)$isdir, TRUE); if (!file.exists(path) || !isDirectory) path <- NA; paths[kk] <- path; } if (length(paths) > 0) paths <- paths[!is.na(paths)]; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Search for files # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - pathnames <- c(); for (path in paths) { files <- list.files(path, all.files=allFiles, full.names=TRUE); # Exclude listings that are neither files nor directories files <- gsub("^[.][/\\]", "", files); files <- files[nchar(files) > 0]; if (length(files) > 0) { excl <- (basename(files) %in% c(".", "..", "/", "\\")); files <- files[!excl]; } # Nothing to do? if (length(files) == 0) next; # Expand Windows shortcut links? files0 <- files; if (hasRutilsLoaded) { # Remember these files <- sapply(files, FUN=filePath, expandLinks="any", USE.NAMES=FALSE); } # Keep only existing files and directories ok <- sapply(files, FUN=function(file) { (file.exists(path) && !is.na(file.info(file)$isdir)); }, USE.NAMES=FALSE); files <- files[ok]; files0 <- files0[ok]; # Nothing to do? if (length(files) == 0) next; # First search the files, then the directories, so... # Note, isdir is TRUE for directories, FALSE for files, # *and* NA for non-existing files, e.g. items found by # list.files() but are broken Unix links. isDir <- sapply(files, FUN=function(file) { identical(file.info(file)$isdir, TRUE); file.info(file)$isdir; }, USE.NAMES=FALSE); # In case some files are non-accessible, exclude them ok <- (!is.na(isDir)); files <- files[ok]; files0 <- files0[ok]; isDir <- isDir[ok]; # Nothing to do? if (length(files) == 0) next; # Directories and files in lexicographic order dirs <- files[isDir]; files <- files[!isDir]; files0 <- files0[!isDir]; # Keep only files that match the filename pattern # of the non-expanded filename. if (!is.null(pattern)) { keep <- grep(pattern, basename(files0)); files <- files[keep]; } if (length(files) > 0) { files <- sort(files); if (firstOnly) return(files[1]); # Store results pathnames <- c(pathnames, files); } # Search directories recursively? if (recursive) { if (length(dirs) == 0) next; for (dir in sort(dirs)) { files <- findFiles(pattern=pattern, paths=dir, recursive=recursive, firstOnly=firstOnly, ...); if (length(files) > 0 && firstOnly) return(files[1]); pathnames <- c(pathnames, files); } } } # for (path ...) pathnames; } # findFiles() ############################################################################ # HISTORY: # 2013-03-18 [HB] # o Internal isPackageLoaded() no longer uses defunct manglePackageName(). # 2008-02-21 [HB] # o Added an internal generic isPackageLoaded() function. # 2008-02-20 [KH] # o Replaced require("R.utils") with a "isLoaded()" feature. # 2008-02-14 # o Added argument 'allFiles=TRUE' to findFiles(). # 2007-09-17 # o ROBUSTNESS: Now findFiles() are robust against broken Unix links. # 2007-08-30 # o BUG FIX: Pattern matching was done on expanded filenames, whereas they # should really be done on the non-expanded ones. This, only applies to # Windows shortcuts, but it is not the destination file that is of # interest, but the name of the shortcut file. # o BUG FIX: The recent update was not grep():ing correctly; forgot to # extract the basename(). # 2007-08-27 # o Now findFiles(..., recursive=TRUE) does a breath-first search in # lexicographic order. # o Now findFiles() don't search replicated directories. # 2006-11-01 # o Removed usage of R.utils for now. # 2006-03-14 # o Created from findCdf.R. ############################################################################ affxparser/R/invertMap.R0000644000175200017520000000350214516003651016227 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction invertMap # # @title "Inverts a read or a write map" # # @synopsis # # \description{ # @get "title". # } # # \arguments{ # \item{map}{An @integer @vector.} # \item{...}{Not used.} # } # # \value{ # Returns an @integer @vector. # } # # \details{ # An map is defined to be a @vector of \emph{n} with unique finite # values in \eqn{[1,n]}. Finding the inverse of a map is the same as # finding the rank of each element, cf. @see "base::order". However, # this method is much faster, because it utilizes the fact that all # values are unique and in \eqn{[1,n]}. Moreover, for any map it holds # that taking the inverse twice will result in the same map. # } # # @examples "../incl/invertMap.Rex" # # @author "HB" # # \seealso{ # To generate an optimized write map for a CDF file, see # @see "readCdfUnitsWriteMap". # } # # @keyword "file" # @keyword "IO" # @keyword "internal" #*/######################################################################### invertMap <- function(map, ...) { n <- length(map); if (n == 0) return(map); # Validate map r <- range(map); if (any(is.na(r))) { stop("Argument 'map' is not a map. It contains NA values."); } if (r[1] != 1 || r[2] != n) { stop("Argument 'map' is not a map. Its range is not [1,", n, "]: ", "[", r[1], ", ", r[2], "]"); } # Create the inverse map inverseMap <- vector("integer", n); idx <- 1:n; inverseMap[.subset(map, idx)] <- idx; # == inverseMap[map[idx]] <- idx; inverseMap; } # invertMap() ############################################################################ # HISTORY: # 2006-03-30 # o Created. ############################################################################ affxparser/R/isCelFile.R0000644000175200017520000000446214516003651016127 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction isCelFile # # @title "Checks if a file is a CEL file or not" # # \description{ # @get "title". # } # # @synopsis # # \arguments{ # \item{filename}{A filename.} # \item{...}{Not used.} # } # # \value{ # Returns @TRUE if a CEL file, otherwise @FALSE. # ASCII (v3), binary (v4;XDA), and binary (CCG v1;Calvin) CEL files # are recognized. # If file does not exist, an exception is thrown. # } # # \seealso{ # @see "readCel", @see "readCelHeader", @see "readCelUnits". # } # # @author "HB" # # @keyword programming # @keyword file # @keyword IO # @keyword internal #**/####################################################################### isCelFile <- function(filename, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filename': if (!file.exists(filename)) { stop("Cannot check file format. File not found: ", filename); } # Close an open connections on exit con <- NULL; on.exit({ if (inherits(con, "connection") && isOpen(con)) on.exit(close(con)); }) # Note, we cannot use readCelHeader(), because that will crash R if # it is not a CEL file. header <- NULL; for (ver in c("4", "3", "1")) { tryCatch({ if (inherits(con, "connection") && isOpen(con)) { close(con); con <- NULL; } con <- file(filename, open="rb"); if (ver == "4") { header <- .readCelHeaderV4(con); } else if (ver == "3") { header <- .readCelHeaderV3(con); } else { header <- readCcgHeader(con); dataTypeId <- header$dataHeader$dataTypeId; if (!identical(dataTypeId, "affymetrix-calvin-intensity")) header <- NULL; } }, error = function(ex) { # print(ex); }) } isCelFile <- (!is.null(header)); isCelFile; } ############################################################################ # HISTORY: # 2007-08-16 # o Updated isCelFile() so it returns TRUE for CCG CEL v1 files. # 2006-07-27 # o BUG FIX: The error message when the file was not found was broken. # 2006-07-10 # o Created. ############################################################################ affxparser/R/parseDatHeaderString.R0000644000175200017520000001000714516003651020323 0ustar00biocbuildbiocbuild########################################################################/** # @RdocFunction parseDatHeaderString # # @title "Parses a DAT header string" # # \description{ # @get "title". # } # # @synopsis # # \arguments{ # \item{header}{A @character string.} # \item{timeFormat}{The format string used to parse the timestamp. # For more details, see \code{\link[base:strptime]{strptime}()}. # If @NULL, no parsing is done.} # \item{...}{Not used.} # } # # \value{ # Returns named @list structure. # } # # \seealso{ # @see "readCelHeader". # } # # @author "HB" # # @keyword programming # @keyword file # @keyword IO # @keyword internal #**/####################################################################### parseDatHeaderString <- function(header, timeFormat="%m/%d/%y %H:%M:%S", ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - trim <- function(s, ...) { s <- as.character(s); s <- sub("^[\t\n\f\r ]*", "", s); s <- sub("[\t\n\f\r ]*$", "", s); s; } # trim(); # # There are several sub-fields in this field. The first sub field is the # scanner ID, sometimes followed by a number, followed by three spaces. # If the scanner ID is absent, the field consists of four spaces. # Next are 10 structured comment fields. Each field is preceded by the # delimiter 0x14 and a space. The field is followed by a space and 0x14. # Only field two is valid, the other 9 fields are obsolete. Field 2 # contains the probe array type, followed by .1sq. The 1sq extension is # also obsolete. # Next (after the last structured field) there is the chip orientation # preceded by a space. # The rest of the field is filled with nulls (zeros) # # Split by ASCII 0x14 delimiter parts <- strsplit(header, split=" \024 ", fixed=TRUE)[[1]]; field <- parts[1]; commentFields <- parts[-1]; res <- list(); pattern <- ".*CLS=(.{5})RWS=(.{5})XIN=(.{3})YIN=(.{3})VE=(.{3}).*(.{7})(.{4})"; # Number of pixels per row (padded with spaces), preceded with "CLS=". res$nbrOfPixelColumns <- as.integer(gsub(pattern, "\\1", field)); # Number of rows in the image (padded with spaces), preceded with "RWS=". res$nbrOfPixelRows <- as.integer(gsub(pattern, "\\2", field)); # Pixel width in micrometers (padded with spaces), preceded with "XIN=" res$pixelWidth <- as.double(gsub(pattern, "\\3", field)); # Pixel height in micrometers (padded with spaces), preceded with "YIN=" res$pixelHeight <- as.double(gsub(pattern, "\\4", field)); # Scan speed in millimeters per second (padded with spaces), # preceded with "VE=". res$scanSpeed <- as.double(gsub(pattern, "\\5", field)); # Temperature in degrees Celsius (padded with spaces). If no temperature was # set then the entire field is empty. res$temperature <- as.double(gsub(pattern, "\\6", field)); # Laser power in milliwatts or microwatts (padded with spaces). res$laserPower <- as.double(gsub(pattern, "\\6", field)); # Find the element with a date. It is part of the same string as the # one containing the chip type. Get the chip type from the header. # Extract the date timestamp pattern <- ".*([01][0-9]/[0-3][0-9]/[0-9][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]).*"; timestamp <- gsub(pattern, "\\1", header); timestamp <- trim(timestamp); # Unnecessary? # Parse the identified timestamp into POSIXct? if (!is.null(timeFormat)) { timestamp <- strptime(timestamp, format=timeFormat, ...); # If no valid timestamp was found, return NA. if (length(as.character(timestamp)) == 0) { timestamp <- as.POSIXct(NA); } } res$timestamp <- timestamp; res$chipType <- trim(gsub("[.]1sq", "", commentFields[2])); res; } # parseDatHeaderString() ############################################################################ # HISTORY: # 2009-09-21 # o Created from internal code in aroma.affymetrix. ############################################################################ affxparser/R/private.assertMap.R0000644000175200017520000000331114516003651017670 0ustar00biocbuildbiocbuild#########################################################################/-Rdoc TURNED OFF-** # @RdocFunction .assertMap # # @title "Validates a read or a write map" # # @synopsis # # \description{ # @get "title". # } # # \arguments{ # \item{map}{An @integer @vector.} # \item{nbrOfCells}{The number of cells on the array.} # \item{...}{Not used.} # } # # \value{ # Returns (invisibly) the map as an @integer @vector, if it is a valid # map, otherwise an error is thrown. # } # # @author "HB" # # @keyword "file" # @keyword "IO" # @keyword "internal" #*-Rdoc TURNED OFF-/######################################################################### .assertMap <- function(map, nbrOfCells=length(map), ...) { n <- length(map); if (n != nbrOfCells) { stop("Argument 'map' is not a valid map. The number of elements does not match the number of cells on the array: ", n, " != ", nbrOfCells); } # Coerce to integers map <- as.integer(map); # Assert that there are no NAs r <- range(map); if (any(is.na(r))) { stop("Argument 'map' is not a valid map. It contains NA values."); } # Check range if (r[1] != 1 || r[2] != nbrOfCells) { stop("Argument 'map' is not a valid map. Its range is not [1,", nbrOfCells, "]: ", "[", r[1], ", ", r[2], "]"); } # Check that the map is bijective # map2 <- invertMap(invertMap(map)); # if (!identical(map, map2)) { # stop("Argument 'map' is not a valid map. It is not bijective."); # } invisible(map); } # .assertMap() ############################################################################ # HISTORY: # 2007-01-04 # o Created. ############################################################################ affxparser/R/private.readCelHeaderV3.R0000644000175200017520000000537214516003651020623 0ustar00biocbuildbiocbuild########################################################################/-Rdoc TURNED OFF-** # @RdocFunction .readCelHeaderV3 # # @title "Read the header of a CEL v3 (ASCII) file" # # \description{ # @get "title". # } # # @synopsis # # \arguments{ # \item{con}{An open and readable @connection.} # } # # \value{ # Returns a named @list structure. # } # # \details{ # When the header is read, the file pointer is at the line after the # header. Empty lines should be expected before the intensity section. # This function does not make use of Fusion SDK. # } # # @author "HB" # # @keyword IO # @keyword File # @keyword internal #**-Rdoc TURNED OFF-/####################################################################### .readCelHeaderV3 <- function(con, ...) { trim <- function(s, ...) { s <- gsub("^[ \t]*", "", s); s <- gsub("[ \t]*$", "", s); s; } # Read "[CEL]" while (TRUE) { tag <- trim(readLines(con, n=1)); if (!identical(tag, "")) break; } if (!identical(tag, "[CEL]")) { stop("Could not read CEL v3 file header. File format error: File does not start with [CEL]: ", tag); } # Read version version <- trim(readLines(con, n=1)); if (!identical(version, "Version=3")) { stop("Could not read CEL v3 file header. Not a version 3 file: ", version); } # Read "[HEADER]" while (TRUE) { tag <- trim(readLines(con, n=1)); if (!identical(tag, "")) break; } if (!identical(tag, "[HEADER]")) { stop("Could not read CEL v3 file header. Expected HEADER section, but got: ", tag); } # Read header fields header <- list( version = as.integer(3) ); while (TRUE) { field <- trim(readLines(con, n=1)); if (identical(field, "")) break; field <- unlist(strsplit(field, split="=")); key <- field[1]; value <- paste(field[-1], collapse="="); header[[key]] <- value; } # Fields to be converted to integers intFields <- c("Cols", "Rows", "TotalX", "TotalY", "OffsetX", "OffsetY", "swapXY"); # Ad hoc, but the "Axis" fields are sometimes misspelled. axisFields <- grep("^Axis[-]*(i|I)nvert(X|Y)$", names(header), value=TRUE); intFields <- c(intFields, axisFields); for (ff in intersect(names(header), intFields)) { header[[ff]] <- as.integer(header[[ff]]); } # Vector fields for (ff in grep("GridCorner", names(header))) { value <- header[[ff]]; value <- unlist(strsplit(value, split=" ")); value <- trim(value); value <- as.integer(value); header[[ff]] <- value; } header; } # .readCelHeaderV3() ############################################################################ # HISTORY: # 2006-07-10 # o Created from .readCelHeaderV3(). ############################################################################ affxparser/R/private.readCelHeaderV4.R0000644000175200017520000000531514516003651020621 0ustar00biocbuildbiocbuild########################################################################/-Rdoc TURNED OFF-** # @RdocFunction .readCelHeaderV4 # # @title "Read the header of a CEL v4 (binary) file" # # \description{ # @get "title". # } # # @synopsis # # \arguments{ # \item{con}{An open and readable @connection.} # } # # \value{ # Returns a named @list structure. # } # # \details{ # When the header is read, the file pointer is at the beginning # of the data section. See also @see "base::seek". # This is an internal function that is used mainly to skip the CEL header # to reach the data section. It does not make use of Fusion SDK. # } # # @author "HB" # # @keyword IO # @keyword File # @keyword internal #**-Rdoc TURNED OFF-/####################################################################### .readCelHeaderV4 <- function(con, ...) { readInteger <- function(con, ...) { readBin(con, what="integer", size=4, n=1, signed=TRUE, endian="little"); } readShort <- function(con, ...) { readBin(con, what="integer", size=2, n=1, signed=TRUE, endian="little"); } readFloat <- function(con, ...) { readBin(con, what="double", size=4, n=1, endian="little"); } readDWord <- function(con, ...) { # NOTE: Ideally we would use signed=FALSE here, but there is no # integer data type in R that can hold 4-byte unsigned integers. # Because of this limitation, readBin() will give a warning that # signed=FALSE only works for size=1 or 2. # WORKAROUND: Use signed=TRUE and assume there are no values # greater that .Machine$integer.max == 2^31-1. /HB 2015-04-15 readBin(con, what="integer", size=4, n=1, signed=TRUE, endian="little"); } readString <- function(con, ...) { len <- readInteger(con); # Number of characters to read readChar(con, nchars=len); } # Read and validate the MAGIC magic <- readInteger(con); if (magic != 64) { stop("Could not read CEL v4 file. File format error: MAGIC == ", magic); } list( magic = magic, version = readInteger(con), cols = readInteger(con), rows = readInteger(con), total = readInteger(con), hdr = readString(con), algorithm = readString(con), parameters = readString(con), cellmargin = readInteger(con), noutliers = readDWord(con), nmasked = readDWord(con), nsubgrids = readInteger(con) ) } # .readCelHeaderV4() ############################################################################ # HISTORY: # 2011-11-01 # o CLEANUP: Changed a signed=FALSE to signed=TRUE for a readBin() call # reading 4-byte integers in .readCelHeaderV4(). # 2006-06-18 # o Created. Used by updateCel() to skip header to reach data section. ############################################################################ affxparser/R/private.unwrapCelHeaderV4.R0000644000175200017520000002716214516003651021226 0ustar00biocbuildbiocbuild.unwrapTagValuePairs <- function(bfr, ...) { trim <- function(s) { s <- gsub("^ *", "", s); s <- gsub(" *$", "", s); } bfr <- trim(bfr); patterns <- c("^([^:]*):([^;]*)[;]*(.*)$", "^([^=]*)=([^ ]*)[ ]*(.*)$"); tags <- values <- c(); while (nchar(bfr) > 0) { for (pattern in patterns) { tag <- gsub(pattern, "\\1", bfr); if (!identical(tag, bfr)) break; } value <- gsub(pattern, "\\2", bfr); tags <- c(tags, tag); values <- c(values, value); bfr <- gsub(pattern, "\\3", bfr); bfr <- trim(bfr); } params <- as.list(values); names(params) <- tags; params; } .unwrapDatHeaderString <- function(header, ...) { trim <- function(s) { s <- gsub("^ *", "", s); s <- gsub(" *$", "", s); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Already a list? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.list(header)) { } else { header <- strsplit(header, split="\n")[[1]]; } # Extract the "head" and the "tail" of the DAT header pattern <- "([^\024]*)(\024.*)"; head <- gsub(pattern, "\\1", header); tail <- gsub(pattern, "\\2", header); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # [123456789012345678900123456789001234567890] # "[5..65534] NA06985_H_tH_B5_3005533:", ???? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - pattern <- "^([^:]*):(.*)$"; if (regexpr(pattern, head) != -1) { bfr <- gsub(pattern, "\\1", header); header2 <- gsub(pattern, "\\2", header); bfr <- trim(bfr); # Example: "[12..40151] Fetal 3" if (nchar(bfr) > 0) { pattern <- "^([^ ]*])[ ]*(.*)[ ]*"; pixelRange <- gsub(pattern, "\\1", bfr); sampleName <- gsub(pattern, "\\2", bfr); if (identical(pixelRange, sampleName)) { stop("Internal error: Failed to extract 'pixelRange' and 'sampleName' from DAT header. They became identical: ", pixelRange); } } else { pixelRange <- ""; sampleName <- ""; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Parse the DAT header # # 1. Number of pixels per row (padded with spaces), preceded with # "CLS=". char[9] # 2. Number of rows in the image (padded with spaces), preceded with # "RWS=".char[9] # 3. Pixel width in micrometers (padded with spaces), preceded with # "XIN=" char[7] # 4. Pixel height in micrometers (padded with spaces), preceded with # "YIN=". char[7] # 5. Scan speed in millimeters per second (padded with spaces), preceded # with "VE=". char[6] # 6. Temperature in degrees Celsius (padded with spaces). If no temperature # was set then the entire field is empty. char[7] # 7. Laser power in milliwatts or microwatts (padded with spaces). char[4] # 8. Date and time of scan (padded with spaces). char[18] # # Example: # [123456789012345678900123456789001234567890] (See above) # "CLS=8714 ", # "RWS=8714 ", # "XIN=1 ", # "YIN=1 ", # "VE=30 ", # " ", # "2.0 ", # "01/14/04 14:26:57 " # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - len <- c(9,9,7,7,6,7,4,18,220); ends <- cumsum(len); starts <- ends - len + 1; header <- substring(header2, starts, ends); header <- trim(header); # Store the last field bfr <- header[9]; header <- list( pixelRange = pixelRange, sampleName = sampleName, CLS = gsub("^CLS=(.*)", "\\1", header[1]), RWS = gsub("^RWS=(.*)", "\\1", header[2]), XIN = gsub("^XIN=(.*)", "\\1", header[3]), YIN = gsub("^YIN=(.*)", "\\1", header[4]), VE = gsub("^VE=(.*)", "\\1", header[5]), scanTemp = header[6], laserPower = header[7], scanDate = header[8] ); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # The 'bfr' field: # # "There are several sub-fields in this field. The first sub field is the # scanner ID, sometimes followed by a number, followed by three spaces. # If the scanner ID is absent, the field consists of four spaces. # # Example: # [123456789012345678900123456789001234567890] (????) # "50101230 M10 \024 \024 Hind240.1sq \024 \024 \024 \024 # \024 \024 \024 \024 \024 6" # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 0x14 == 024 pattern <- "^([^\024]*)[ ]*(\024.*)$"; scannerInfo <- gsub(pattern, "\\1", bfr); scannerInfo <- trim(scannerInfo); bfr <- gsub(pattern, "\\2", bfr); # Not locale safe: pattern <- "^([a-zA-Z0-9]*)[ ]*([a-zA-Z0-9]*)[ ]*"; pattern <- "^([abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]*)[ ]*([abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]*)[ ]*"; header$scanner <- list( id = gsub(pattern, "\\1", scannerInfo), type = gsub(pattern, "\\2", scannerInfo) ); } else { # TO DO: Make these NAs to have the correct storage modes naValue <- as.character(NA); naValue <- ""; header <- list( pixelRange = naValue, sampleName = naValue, CLS = naValue, RWS = naValue, XIN = naValue, YIN = naValue, VE = naValue, scanTemp = naValue, laserPower = naValue, scanDate = naValue, scanner = list(id=naValue, type=naValue) ); } bfr <- tail; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Next are 10 structured comment fields. Each field is preceded by the # delimiter 0x14 and a space. The field is followed by a space and 0x14. # Only field two is valid, the other 9 fields are obsolete. Field 2 # contains the probe array type, followed by .1sq. The 1sq extension is # also obsolete. # # Next (after the last structured field) there is the chip orientation # preceded by a space. # # The rest of the field is filled with nulls (zeros)". Size: char[220] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - pattern <- "\024 ([^\024]*)(.*)"; values <- c(); lastNchar <- -Inf; while (nchar(bfr) != lastNchar) { lastNchar <- nchar(bfr); value <- gsub(pattern, "\\1", bfr); value <- trim(value); bfr <- gsub(pattern, "\\2", bfr); values <- c(values, value); } header$misc <- values; header$chipType <- gsub("[.]1sq$", "", values[2]); header; } # .unwrapDatHeaderString() .unwrapCelHeaderV3String <- function(header, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Already a list? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.list(header)) { } else { header <- strsplit(header, split="\n")[[1]]; # keep <- (unlist(lapply(header, FUN=nchar)) > 0); # header <- header[keep]; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Parse the CEL v3 header # # "The data in each section is of the format TAG=VALUE." # # Cols - The number of columns in the array (of cells). # Rows - The number of rows in the array (of cells). # TotalX - Same as Cols. # TotalY - Same as Rows. # OffsetX - Not used, always 0. # OffsetY - Not used, always 0. # GridCornerUL - XY coordinates of the upper left grid corner in pixel # coordinates. # GridCornerUR - XY coordinates of the upper right grid corner in pixel # coordinates. # GridCornerLR - XY coordinates of the lower right grid corner in pixel # coordinates. # GridCornerLL - XY coordinates of the lower left grid corner in pixel # coordinates. # Axis-invertX - Not used, always 0. # AxisInvertY - Not used, always 0. # swapXY - Not used, always 0. # DatHeader - The header from the DAT file. # Algorithm - The algorithm name used to create the CEL file. # AlgorithmParameters - The parameters used by the algorithm. The format # is TAG:VALUE pairs separated by semi-colons or TAG=VALUE pairs separated # by spaces. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - pattern <- "^([-a-zA-Z0-9]*)=(.*)$"; names <- gsub(pattern, "\\1", header); values <- gsub(pattern, "\\2", header); names(values) <- names; header <- as.list(values); # Fix some mishaps in names (sic!; see DevNet forum this week) /HB 2006-09-10 # names(header) <- gsub("^Axis-invert", "Axis-Invert", names(header)); # names(header) <- gsub("^AxisInvert", "Axis-Invert", names(header)); # Assert that all mandatory fields are there knownFields <- c("Cols", "Rows", "TotalX", "TotalY", "OffsetX", "OffsetY", "GridCornerUL", "GridCornerUR", "GridCornerLR", "GridCornerLL", "Axis-invertX", "AxisInvertY", "swapXY", "DatHeader", "Algorithm", "AlgorithmParameters"); missing <- !(knownFields %in% names(header)); if (any(missing)) { stop("Argument 'header' does not contain all mandatory fields: ", paste(knownFields[missing], collapse=", ")); } # Unwrap DAT header string header$DatHeader <- .unwrapDatHeaderString(header$DatHeader); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Unwrap 'AlgorithmParameters': # # AlgorithmParameters - The parameters used by the algorithm. The format # is TAG:VALUE pairs separated by semi-colons or TAG=VALUE pairs separated # by spaces. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - header$AlgorithmParameters <- .unwrapTagValuePairs(header$AlgorithmParameters); header; } # .unwrapCelHeaderV3String() # \arguments{ # \item{header}{A @list structure as returned by @see "readCelHeader".} # } .unwrapCelHeaderV4 <- function(header, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'header': if (!is.list(header)) { stop("Argument 'header' is not a list: ", mode(header)); } # Assert that all header fields are there knownFields <- c("version", "cols", "rows", "total", "algorithm", "parameters", "chiptype", "header", "cellmargin", "noutliers", "nmasked"); missing <- !(knownFields %in% names(header)); if (any(missing)) { stop("Argument 'header' does not contain all mandatory fields: ", paste(knownFields[missing], collapse=", ")); } header$parameters <- .unwrapTagValuePairs(header$parameters); header$header <- .unwrapCelHeaderV3String(header$header); header; } # .unwrapCelHeaderV4() ############################################################################ # HISTORY: # 2011-02-22 # o ROBUSTNESS/BUG FIX: The internal .unwrapDatHeaderString() would # throw "Internal error: Failed to extract 'pixelRange' and 'sampleName' # from DAT header. They became identical: ..." in case the DAT header # of the CEL file did not contain all fields. The function has now # been updated to be more forgiving and robust so that missing values # are returned for such fields instead. # 2007-08-16 # o BUG FIX: Internal .unwrapDatHeaderString() failed to correctly extract # 'pixelRange' and 'sampleName' from DAT header. # 2006-12-28 # o R CMD check v2.5.0 devel complained about: Warning: '\]' is an # unrecognized escape in a character string. Warning: unrecognized escape # removed from "^([^\]]*])[ ]*(.*)[ ]*". Replaced with '\\]'. # 2006-09-10 # o BUG FIX: Local trim() was missing in one of the private functions. # 2006-09-06 # o Created. This is used by writeCelHeaderV4(). ############################################################################ affxparser/R/private.wrapCelHeaderV4.R0000644000175200017520000001220214516003651020650 0ustar00biocbuildbiocbuild.wrapTagValuePairs <- function(args, ...) { fmtstr <- "%s=%s"; params <- unlist(args); values <- sprintf(fmtstr, names(params), params); values <- paste(values, collapse=";") } # .wrapTagValuePairs() .wrapDatHeader <- function(header, ...) { bfr <- c(); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # [123456789012345678900123456789001234567890] # "[5..65534] NA06985_H_tH_B5_3005533:", ???? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fmtstr <- "%s %s:"; value <- sprintf(fmtstr, header$pixelRange, header$sampleName); bfr <- c(bfr, value); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Warp the DAT header # # 1. Number of pixels per row (padded with spaces), preceded with # "CLS=". char[9] # 2. Number of rows in the image (padded with spaces), preceded with # "RWS=".char[9] # 3. Pixel width in micrometers (padded with spaces), preceded with # "XIN=" char[7] # 4. Pixel height in micrometers (padded with spaces), preceded with # "YIN=". char[7] # 5. Scan speed in millimeters per second (padded with spaces), preceded # with "VE=". char[6] # 6. Temperature in degrees Celsius (padded with spaces). If no temperature # was set then the entire field is empty. char[7] # 7. Laser power in milliwatts or microwatts (padded with spaces). char[4] # 8. Date and time of scan (padded with spaces). char[18] # # Example: # [123456789012345678900123456789001234567890] (See above) # "CLS=8714 ", # "RWS=8714 ", # "XIN=1 ", # "YIN=1 ", # "VE=30 ", # " ", # "2.0 ", # "01/14/04 14:26:57 " # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fmtstr <- "CLS=%-5.5sRWS=%-5.5sXIN=%-3.3sYIN=%-3.3sVE=%-3.3s%-7.7s%-4.4s%-18.18s"; value <- sprintf(fmtstr, header$CLS, header$RWS, header$XIN, header$YIN, header$VE, header$scanTemp, header$laserPower, header$scanDate); # Assert correct length (9+9+7+7+6+7+4+18=67) if (nchar(value) != 67) stop("Internal error in .wrapDatHeader(). Incorrect string length (", nchar(value), " != 67): ", value); bfr <- c(bfr, value); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # [123456789012345678900123456789001234567890] (????) # " " # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (nchar(header$scanner$id) == 0) { value <- " "; } else { fmtstr <- "%s %s "; value <- sprintf(fmtstr, header$scanner$id, header$scanner$type); } bfr <- c(bfr, value); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # [123456789012345678900123456789001234567890] (????) # "\024 \024 \024 \024 \024 \024 \024 \024 \024 \024 \024 " # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Make sure 'misc' is of length 10. header$misc <- c(header$misc, rep("", 20-length(header$misc))); header$misc <- header$misc[1:10]; # IMPORTANT: Overwrite 'chip type' value if (is.null(header$chipType)) stop("DAT header has not 'chipType' field."); header$misc[2] <- sprintf("%s.1sq", header$chipType); fmtstr <- "\024 %s "; values <- sprintf(fmtstr, header$misc); values <- paste(values, collapse=""); # values <- paste(values, "\024 6", sep=""); bfr <- c(bfr, values); bfr <- paste(bfr, collapse=""); bfr; } # .wrapDatHeader() .wrapCelHeaderV3 <- function(header, ...) { # Make sure the header is consistent header$TotalX <- header$Cols; header$TotalY <- header$Rows; header$OffsetX <- 0; header$OffsetY <- 0; header$"Axis-invertX" <- 0; header$"AxisInvertY" <- 0; header$swapXY <- 0; # Wrap up the DAT header header$DatHeader <- .wrapDatHeader(header$DatHeader); # Wrap up the 'AlgorithmParameters' header header$AlgorithmParameters <- .wrapTagValuePairs(header$AlgorithmParameters); # Wrap up everything else fmtstr <- "%s=%s"; header <- unlist(header); header <- sprintf(fmtstr, names(header), header); header <- paste(header, collapse="\n") header <- paste(header, "\n", sep=""); header; } # .wrapCelHeaderV3() .wrapCelHeaderV4 <- function(header, ...) { # Make sure the fields are consistent header$version <- as.integer(4); header$total <- header$cols * header$rows; # Make sure the CEL V3 header is consistent headerV3 <- header$header; headerV3$Cols <- header$cols; headerV3$Rows <- header$rows; # Override any algorithm and parameters in V3 header headerV3$Algorithm <- header$algorithm; headerV3$AlgorithmParameters <- header$parameters; headerV3 <- .wrapCelHeaderV3(headerV3); header$header <- headerV3; # Not needed anymore, wrap them up header$parameters <- .wrapTagValuePairs(header$parameters); header; } # .wrapCelHeaderV4() ############################################################################ # HISTORY: # 2007-08-16 # o Now internal .wrapCelHeaderV4() sets the version number as an integer. # 2006-09-06 # o Created. This is used by writeCelHeaderV4(). ############################################################################ affxparser/R/readBpmap.R0000644000175200017520000000176614516003651016167 0ustar00biocbuildbiocbuildreadBpmap <- function(filename, seqIndices = NULL, readProbeSeq = TRUE, readSeqInfo = TRUE, readPMXY = TRUE, readMMXY = TRUE, readStartPos = TRUE, readCenterPos = FALSE, readStrand = TRUE, readMatchScore = FALSE, readProbeLength = FALSE, verbose = 0) { res <- .Call("R_affx_get_bpmap_file", filename, as.integer(seqIndices), readSeqInfo, readStartPos, readCenterPos, readProbeSeq, readStrand, readPMXY, readMMXY, readMatchScore, readProbeLength, as.integer(verbose), PACKAGE = "affxparser"); # Sanity check if (is.null(res)) { stop("Failed to read BPMAP file: ", filename); } res; } # readBpmap() ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. ############################################################################ affxparser/R/readBpmapHeader.R0000644000175200017520000000074114516003651017270 0ustar00biocbuildbiocbuildreadBpmapHeader <- function(filename) { res <- .Call("R_affx_get_bpmap_header", filename); # Sanity check if (is.null(res)) { stop("Failed to read BPMAP file header: ", filename); } res; } # readBpmapHeader() ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. ############################################################################ affxparser/R/readBpmapSeqInfo.R0000644000175200017520000000113014516003651017435 0ustar00biocbuildbiocbuildreadBpmapSeqinfo <- function(filename, seqIndices=NULL, verbose=0) { res <- .Call("R_affx_get_bpmap_seqinfo", filename, as.integer(seqIndices), as.integer(verbose), PACKAGE = "affxparser"); # Sanity check if (is.null(res)) { stop("Failed to read sequence information from BPMAP file: ", filename); } res; } # readBpmapSeqinfo() ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. ############################################################################ affxparser/R/readCcg.R0000644000175200017520000005026514516003651015622 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction readCcg # # @title "Reads an Affymetrix Command Console Generic (CCG) Data file" # # @synopsis # # \description{ # @get "title". The CCG data file format is also known as the # Calvin file format. # } # # \arguments{ # \item{pathname}{The pathname of the CCG file.} # \item{verbose}{An @integer specifying the verbose level. If 0, the # file is parsed quietly. The higher numbers, the more details.} # \item{.filter}{A @list.} # \item{...}{Not used.} # } # # \value{ # A named @list structure consisting of ... # } # # \details{ # Note, the current implementation of this methods does not utilize the # Affymetrix Fusion SDK library. Instead, it is implemented in R from the # file format definition [1]. # } # # \section{About the CCG file format}{ # A CCG file, consists of a "file header", a "generic data header", # and "data" section, as outlined here: # \itemize{ # \item File Header # \item Generic Data Header (for the file) # \enumerate{ # \item Generic Data Header (for the files 1st parent) # \enumerate{ # \item Generic Data Header (for the files 1st parents 1st parent) # \item Generic Data Header (for the files 1st parents 2nd parent) # \item ... # \item Generic Data Header (for the files 1st parents Mth parent) # } # \item Generic Data Header (for the files 2nd parent) # \item ... # \item Generic Data Header (for the files Nth parent) # } # \item Data # \enumerate{ # \item Data Group #1 # \enumerate{ # \item Data Set #1 # \itemize{ # \item Parameters # \item Column definitions # \item Matrix of data # } # \item Data Set #2 # \item ... # \item Data Set #L # } # \item Data Group #2 # \item ... # \item Data Group #K # } # } # } # # @author "HB" # # \seealso{ # @see "readCcgHeader". # @see "readCdfUnits". # } # # \references{ # [1] Affymetrix Inc, Affymetrix GCOS 1.x compatible file formats, # April, 2006. # \url{http://www.affymetrix.com/support/developer/}\cr # } # # @keyword "file" # @keyword "IO" #*/######################################################################### readCcg <- function(pathname, verbose=0, .filter=NULL, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument '.filter': hasFilter <- FALSE; if (!is.null(.filter)) { if (!is.list(.filter)) { stop("Argument '.filter' must be a list: ", mode(.filter)); } hasFilter <- TRUE; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Open file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - con <- file(pathname, open="rb"); on.exit(close(con)); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Allocate return structure # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ccg <- list(); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read file header # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fhdr <- .readCcgFileHeader(con); if (hasFilter) { if (!identical(.filter$header, FALSE)) ccg$fileHeader <- fhdr; } else { ccg$fileHeader <- fhdr; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read the data header # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ccg$genericDataHeader <- .readCcgDataHeader(con, .filter=.filter$dataHeader); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read the data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - dataGroups <- .readCcgDataGroups(con, .filter=.filter$data, .fileHeader=fhdr); if (hasFilter) { if (!identical(.filter$dataGroups, FALSE)) ccg$dataGroups <- dataGroups; } else { ccg$dataGroups <- dataGroups; } ccg; } # readCcg() .readCcgDataGroups <- function(pathname, .filter=NULL, .fileHeader=NULL, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'pathname': if (inherits(pathname, "connection")) { con <- pathname; } else { if (!file.exists(pathname)) stop("File not found: ", pathname); con <- file(pathname, open="rb"); on.exit(close(con)); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read file header? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.null(.fileHeader)) { .fileHeader <- .readCcgFileHeader(con); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read data groups # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - currFilter <- .filter; nextDataGroupStart <- .fileHeader$dataGroupStart; dataGroups <- list(); for (gg in seq_len(.fileHeader$nbrOfDataGroups)) { dataGroupHeader <- .readCcgDataGroupHeader(con, fileOffset=nextDataGroupStart); # Next data group nextDataGroupStart <- dataGroupHeader$nextGroupStart; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Apply filter # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # if (!is.null(.filter)) { # currFilter <- NULL; # if (is.null(names(.filter))) { # currFilter <- .filter[[gg]]; # } else { # pos <- match(dataGroupHeader$name, names(.filter)); # if (length(pos) > 0) # currFilter <- .filter[[pos]]; # } # } # str(currFilter); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read data sets # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - offset <- dataGroupHeader$dataSetStart; dss <- vector("list", dataGroupHeader$nbrOfDataSets); names <- character(dataGroupHeader$nbrOfDataSets); for (kk in seq_along(dss)) { ds <- .readCcgDataSet(con, fileOffset=offset); offset <- ds$nextDataSetStart; dss[[kk]] <- ds; names[kk] <- ds$name; }; names(dss) <- names; dataGroup <- list( header = dataGroupHeader, dataSets = dss ); dataGroups <- c(dataGroups, list(dataGroup)); } # while (nextDataGroupStart != 0) names(dataGroups) <- unlist(lapply(dataGroups, FUN=function(dg) { dg$header$name }), use.names=FALSE); dataGroups; } # .readCcgDataGroups() .readCcgDataGroupHeader <- function(con, fileOffset=NULL, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rawToString <- function(raw, ...) { # This approach drops all '\0', in order to avoid warnings # in rawToChar(). Note, it does not truncate the string after # the first '\0'. However, such strings should never occur in # the first place. raw <- raw[raw != as.raw(0)]; rawToChar(raw); } readInt <- function(con, n=1, ...) { readBin(con, what=integer(), size=4, signed=TRUE, endian="big", n=n); } readUInt <- function(con, n=1, ...) { # NOTE: Ideally we would use signed=FALSE here, but there is no # integer data type in R that can hold 4-byte unsigned integers. # Because of this limitation, readBin() will give a warning that # signed=FALSE only works for size=1 or 2. # WORKAROUND: Use signed=TRUE and assume there are no values # greater that .Machine$integer.max == 2^31-1. /HB 2015-04-15 readBin(con, what=integer(), size=4, signed=TRUE, endian="big", n=n); } readWString <- function(con, ...) { nchars <- readInt(con); if (nchars == 0) return(""); bfr <- readBin(con, what=raw(), n=2*nchars); bfr <- bfr[seq(from=2, to=length(bfr), by=2)]; rawToString(bfr); } readRaw <- function(con, ...) { n <- readInt(con); if (n == 0) return(raw(0)); readBin(con, what=raw(0), n=n); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (!is.null(fileOffset)) { seek(con=con, where=fileOffset, offset="start", rw="read"); } # Data Group # This section describes the data group. A data group is a group # of data sets. The file supports one or more data groups in a file. # # Item Description Type # 1 File position of the next data group. When this is the last # data group in the file, the value should be 0. UINT # 2 File position of the first data set within the data group. UINT # 3 The number of data sets within the data group. INT # 4 The data group name. WSTRING nextGroupStart=readUInt(con) dataSetStart=readUInt(con) nbrOfDataSets=readInt(con) name=readWString(con) dataGroupHeader <- list( nextGroupStart=nextGroupStart, dataSetStart=dataSetStart, nbrOfDataSets=nbrOfDataSets, name=name ) dataGroupHeader; } # .readCcgDataGroupHeader() .readCcgDataSet <- function(con, fileOffset=NULL, ...) { # Value Types # The following table defines the numeric values for the value types. # The value type is used to representing the type of value stored in # the file. # # Value Type # 0 BYTE # 1 UBYTE # 2 SHORT # 3 USHORT # 4 INT # 5 UINT # 6 FLOAT # 7 STRING # 8 WSTRING whats <- c("integer", "integer", "integer", "integer", "integer", "integer", "double", "character", "character"); names(whats) <- c("BYTE", "UBYTE", "SHORT", "USHORT", "INT", "UINT", "FLOAT", "STRING", "WSTRING"); signeds <- c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE); sizes <- c(1, 1, 2, 2, 4, 4, 4, 1, 2); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rawToString <- function(raw, ...) { # This approach drops all '\0', in order to avoid warnings # in rawToChar(). Note, it does not truncate the string after # the first '\0'. However, such strings should never occur in # the first place. raw <- raw[raw != as.raw(0)]; rawToChar(raw); } readByte <- function(con, n=1, ...) { readBin(con, what=integer(), size=1, signed=TRUE, endian="big", n=n); } readInt <- function(con, n=1, ...) { readBin(con, what=integer(), size=4, signed=TRUE, endian="big", n=n); } readUInt <- function(con, n=1, ...) { # NOTE: Ideally we would use signed=FALSE here, but there is no # integer data type in R that can hold 4-byte unsigned integers. # Because of this limitation, readBin() will give a warning that # signed=FALSE only works for size=1 or 2. # WORKAROUND: Use signed=TRUE and assume there are no values # greater that .Machine$integer.max == 2^31-1. /HB 2015-04-15 readBin(con, what=integer(), size=4, signed=TRUE, endian="big", n=n); } readString <- function(con, ...) { nchars <- readInt(con); if (nchars == 0) return(""); readChar(con, nchars=nchars); } readWString <- function(con, ...) { nchars <- readInt(con); if (nchars == 0) return(""); bfr <- readBin(con, what=raw(), n=2*nchars); bfr <- bfr[seq(from=2, to=length(bfr), by=2)]; rawToString(bfr); } readRaw <- function(con, ...) { n <- readInt(con); if (n == 0) return(raw(0)); readBin(con, what=raw(0), n=n); } readWVT <- function(con, ...) { name <- readWString(con); raw <- readRaw(con); type <- readWString(con); # Update data types # * text/x-calvin-integer-8 # * text/x-calvin-unsigned-integer-8 # * text/x-calvin-integer-16 # * text/x-calvin-unsigned-integer-16 # * text/x-calvin-integer-32 # * text/x-calvin-unsigned-integer-32 # * text/x-calvin-float # * text/plain n <- length(raw); # cat(sprintf("Reading n=%d records of type '%s' named '%s'.\n", n, type, name)); value <- switch(type, "text/ascii" = { rawToString(raw); }, "text/plain" = { # Unicode/UTF-16?!? raw <- matrix(raw, ncol=2, byrow=TRUE); raw <- raw[,2]; rawToString(raw); }, "text/x-calvin-integer-8" = { readBin(raw, what=integer(0), endian="big", size=1, signed=TRUE, n=n); }, "text/x-calvin-unsigned-integer-8" = { readBin(raw, what=integer(0), endian="big", size=1, signed=FALSE, n=n); }, "text/x-calvin-integer-16" = { readBin(raw, what=integer(0), endian="big", size=2, signed=TRUE, n=n); }, "text/x-calvin-unsigned-integer-16" = { readBin(raw, what=integer(0), endian="big", size=2, signed=FALSE, n=n); }, "text/x-calvin-integer-32" = { readBin(raw, what=integer(0), endian="big", size=4, signed=TRUE, n=n); }, "text/x-calvin-unsigned-integer-32" = { # NOTE: Ideally we would use signed=FALSE here, but there is no # integer data type in R that can hold 4-byte unsigned integers. # Because of this limitation, readBin() will give a warning that # signed=FALSE only works for size=1 or 2. # WORKAROUND: Use signed=TRUE and assume there are no values # greater that .Machine$integer.max == 2^31-1. /HB 2015-04-15 readBin(raw, what=integer(0), endian="big", size=4, signed=TRUE, n=n); }, "text/x-calvin-float" = { readBin(raw, what=double(0), endian="big", size=4, n=n); }, { raw; } ) # switch() list(name=name, value=value, raw=raw, type=type); } # readWVT() readWBI <- function(con, ...) { list(name=readWString(con), type=readByte(con), size=readInt(con)); } if (!is.null(fileOffset)) { seek(con=con, where=fileOffset, offset="start", rw="read"); } # Data Set # This section describes the data for a single data set item # (probe set, sequence, allele, etc.). The file supports one # or more data sets within a data group. # # Item Description Type # 1 The file position of the first data element in the data set. # This is the first byte after the data set header. UINT # 2 The file position of the next data set within the data group. # When this is the last data set in the data group the value # shall be 1 byte past the end of the data set. This way the size # of the data set may be determined. UINT # 3 The data set name. WSTRING # 4 The number of name/value/type parameters. INT # 5 Array of name/value/type parameters. (WSTRING / VALUE / TYPE) [ ] # 6 Number of columns in the data set. # Example: For expression arrays, columns may include signal, p-value, # detection call and for genotyping arrays columns may include allele # call, and confidence value. For universal arrays, columns may # include probe set intensities and background. UINT # 7 An array of column names, column value types and column type sizes # (one per column). # The value type shall be represented by the value from the value type # table. The size shall be the size of the type in bytes. For strings, # this value shall be the size of the string in bytes plus 4 bytes for # the string length written before the string in the file. # (WSTRING / BYTE / INT) [ ] # 8 The number of rows in the data set. UINT # 9 The data set table, consisting of rows of columns (data values). # The specific type and size of each column is described by the data # and size types above. ROW [ ] dataSet <- list( elementsStart=readUInt(con), nextDataSetStart=readUInt(con), name=readWString(con) ) # Reading parameters nbrOfParams <- readInt(con); params <- vector("list", nbrOfParams); names <- character(nbrOfParams); for (kk in seq_len(nbrOfParams)) { wvt <- readWVT(con); names[kk] <- wvt$name; value <- wvt$value; attr(value, "mimeType") <- wvt$type; params[[kk]] <- value; } names(params) <- names; dataSet$parameters <- params; # Reading columns nbrOfColumns <- readUInt(con); columns <- vector("list", nbrOfColumns); names <- character(nbrOfColumns); colWhats <- vector("list", nbrOfColumns); bytesPerRow <- 0; for (cc in seq_len(nbrOfColumns)) { wbi <- readWBI(con); names[cc] <- wbi$name; what <- whats[wbi$type+1]; signed <- signeds[wbi$type+1]; size <- wbi$size; bytesPerRow <- bytesPerRow + size; attr(what, "name") <- names(whats)[wbi$type+1]; attr(what, "signed") <- signed; attr(what, "size") <- size; colWhats[[cc]] <- what; } names(colWhats) <- names; bytesPerRow <- as.integer(bytesPerRow); nbrOfRows <- readUInt(con); totalNbrOfBytes <- nbrOfRows * bytesPerRow; # Skip to the first element seek(con, which=dataSet$elementsStart, offset="start", rw="read"); # Read all data row by row raw <- readBin(con, what=raw(), n=totalNbrOfBytes); dim(raw) <- c(bytesPerRow, nbrOfRows); table <- vector("list", nbrOfColumns); colsOffset <- 0; for (cc in seq_len(nbrOfColumns)) { what <- colWhats[[cc]]; signed <- attr(what, "signed"); size <- attr(what, "size"); if (what == "character") { value <- matrix(raw[1:4,], nrow=nbrOfRows, ncol=4); raw <- raw[-c(1:4),,drop=FALSE]; # Get the number of characters per string (all equal) ## nchars <- readInt(con=value, n=nbrOfRows); ## nchars <- nchars[1]; nchars <- readInt(con=value, n=1); value <- NULL; # Not needed anymore ccs <- 1:(size-4); value <- raw[ccs,]; raw <- raw[-ccs,,drop=FALSE]; value <- rawToChar(value, multiple=TRUE); dim(value) <- c(nchars, nbrOfRows); # Build strings using vectorization (not apply()!) strs <- NULL; for (pp in seq_len(nrow(value))) { valuePP <- value[1,,drop=FALSE]; value <- value[-1,,drop=FALSE]; if (pp == 1) { strs <- valuePP; } else { strs <- paste(strs, valuePP, sep=""); } valuePP <- NULL; # Not needed anymore } value <- strs; strs <- NULL; # Not needed anymore } else { ccs <- 1:size; value <- raw[ccs,,drop=FALSE]; raw <- raw[-ccs,,drop=FALSE]; value <- readBin(con=value, what=what, size=size, signed=signed, endian="big", n=nbrOfRows); } table[[cc]] <- value; colsOffset <- colsOffset + size; } # for (cc ...) # Turn into a data frame attr(table, "row.names") <- .set_row_names(length(table[[1]])); attr(table, "names") <- names; class(table) <- "data.frame"; dataSet$table <- table; dataSet; } # .readCcgDataSet() ############################################################################ # HISTORY: # 2012-05-18 # o Now using stop() instead of throw(). # 2011-11-01 # o CLEANUP: Changed signed=FALSE to signed=TRUE for readBin() calls # reading 4-byte integers in internal .readCcgDataGroupHeader() and # .readCcgDataSet(). # 2009-02-10 # o Added internal rawToString() replacing rawToChar() to avoid warnings # on "truncating string with embedded nul". # 2008-08-23 # o SPEED UP: Removed all gc() calls. # 2008-01-13 # o Removed dependency on intToChar() in R.utils. # o BUG FIX/UPDATE: The file format was updated between April 2006 and # November 2007. More specifically, the so called "Value Types" were # changed/corrected. Before values 7:9 were 'DOUBLE', 'STRING', and # 'WSTRING'. Now 7:8 are 'STRING' and 'WSTRING' and there is no longer # a 'DOUBLE'. # This was detected while trying to read a CNCHP file outputted by the # new Affymetrix Genotyping Console 2.0. We can now read these files. # 2007-08-16 # o Now it is only readCcg() and readCcgHeader() that are public. The # other readCcgNnn() functions are renamed to .readCcgNnn(). # o Now the read data is converted according to the mime type. See internal # readWVT() function. The code is still ad hoc, so it is not generic. # For instance, it basically assumes that Unicode strings only contain # ASCII/ASCII-8 characters. # 2006-11-06 # o Tested on Test3-1-121502.calvin.CEL and Test3-1-121502.calvin.CDF. # o Created. ############################################################################ affxparser/R/readCcgHeader.R0000644000175200017520000002714014516003651016727 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction readCcgHeader # # @title "Reads an the header of an Affymetrix Command Console Generic (CCG) file" # # @synopsis # # \description{ # @get "title". # } # # \arguments{ # \item{pathname}{The pathname of the CCG file.} # \item{verbose}{An @integer specifying the verbose level. If 0, the # file is parsed quietly. The higher numbers, the more details.} # \item{.filter}{A @list.} # \item{...}{Not used.} # } # # \value{ # A named @list structure consisting of ... # } # # @author "HB" # # \details{ # Note, the current implementation of this methods does not utilize the # Affymetrix Fusion SDK library. Instead, it is implemented in R from the # file format definition [1]. # } # # \seealso{ # @see "readCcg". # } # # \references{ # [1] Affymetrix Inc, Affymetrix GCOS 1.x compatible file formats, # April, 2006. # \url{http://www.affymetrix.com/support/developer/}\cr # } # # @keyword "file" # @keyword "IO" #*/######################################################################### readCcgHeader <- function(pathname, verbose=0, .filter=list(fileHeader=TRUE, dataHeader=TRUE), ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'pathname': if (inherits(pathname, "connection")) { con <- pathname; pathname <- NA; } else { if (!file.exists(pathname)) stop("File not found: ", pathname); con <- file(pathname, open="rb"); on.exit(close(con)); } # Argument '.filter': hasFilter <- FALSE; if (!is.null(.filter)) { if (!is.list(.filter)) { stop("Argument '.filter' must be a list: ", mode(.filter)); } hasFilter <- TRUE; } header <- list(filename=pathname); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read file header # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - hdr <- .readCcgFileHeader(con); if (identical(.filter$fileHeader, TRUE) || is.list(.filter$fileHeader)) { header$fileHeader <- hdr; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read the data header # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - hdr <- .readCcgDataHeader(con, .filter=.filter$dataHeader); if (identical(.filter$dataHeader, TRUE) || is.list(.filter$dataHeader)) { header$dataHeader <- hdr; } header; } # readCcgHeader() # File Header # The file header section is the first section of the file. This # section is used to identify the type of file (i.e. Command Console # data file), its version number (for the file format) and the number # of data groups stored within the file. Information about the contents # of the file such as the data type identifier, the parameters used to # create the file and its parentage is stored within the generic data # header section. # # Item Description Type # 1 Magic number. A value to identify that this is a Command Console # data file. The value will be fixed to 59. [UBYTE] # 2 The version number of the file. This is the version of the file # format. It is currently fixed to 1. [UBYTE] # 3 The number of data groups. [INT] # 4 File position of the first data group. [UINT] .readCcgFileHeader <- function(pathname, .filter=NULL, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - readByte <- function(con, n=1, ...) { readBin(con, what=integer(), size=1, signed=TRUE, endian="big", n=n); } readUByte <- function(con, n=1, ...) { readBin(con, what=integer(), size=1, signed=FALSE, endian="big", n=n); } readInt <- function(con, n=1, ...) { readBin(con, what=integer(), size=4, signed=TRUE, endian="big", n=n); } readUInt <- function(con, n=1, ...) { # NOTE: Ideally we would use signed=FALSE here, but there is no # integer data type in R that can hold 4-byte unsigned integers. # Because of this limitation, readBin() will give a warning that # signed=FALSE only works for size=1 or 2. # WORKAROUND: Use signed=TRUE and assume there are no values # greater that .Machine$integer.max == 2^31-1. /HB 2015-04-15 readBin(con, what=integer(), size=4, signed=TRUE, endian="big", n=n); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'pathname': if (inherits(pathname, "connection")) { con <- pathname; } else { if (!file.exists(pathname)) stop("File not found: ", pathname); con <- file(pathname, open="rb"); on.exit(close(con)); } # Argument '.filter': hasFilter <- FALSE; if (!is.null(.filter)) { if (!is.list(.filter)) { stop("Argument '.filter' must be a list: ", mode(.filter)); } hasFilter <- TRUE; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - magic <- readUByte(con); if (magic != 59) stop("File format error: Not a CCG file. Magic is not 59: ", magic); version <- readUByte(con); nbrOfDataGroups <- readInt(con); dataGroupStart <- readUInt(con); list( version = version, nbrOfDataGroups = nbrOfDataGroups, dataGroupStart = dataGroupStart ) } # .readCcgFileHeader() .readCcgDataHeader <- function(con, .filter=NULL, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rawToString <- function(raw, ...) { # This approach drops all '\0', in order to avoid warnings # in rawToChar(). Note, it does not truncate the string after # the first '\0'. However, such strings should never occur in # the first place. raw <- raw[raw != as.raw(0)]; rawToChar(raw); } readByte <- function(con, n=1, ...) { readBin(con, what=integer(), size=1, signed=TRUE, endian="big", n=n); } readUByte <- function(con, n=1, ...) { readBin(con, what=integer(), size=1, signed=FALSE, endian="big", n=n); } readInt <- function(con, n=1, ...) { readBin(con, what=integer(), size=4, signed=TRUE, endian="big", n=n); } readUInt <- function(con, n=1, ...) { # NOTE: Ideally we would use signed=FALSE here, but there is no # integer data type in R that can hold 4-byte unsigned integers. # Because of this limitation, readBin() will give a warning that # signed=FALSE only works for size=1 or 2. # WORKAROUND: Use signed=TRUE and assume there are no values # greater that .Machine$integer.max == 2^31-1. /HB 2015-04-15 readBin(con, what=integer(), size=4, signed=TRUE, endian="big", n=n); } readString <- function(con, ...) { nchars <- readInt(con); if (nchars == 0) return(""); readChar(con, nchars=nchars); } readWString <- function(con, ...) { nchars <- readInt(con); if (nchars == 0) return(""); raw <- readBin(con, what=raw(), n=2*nchars); raw <- raw[seq(from=2, to=length(raw), by=2)]; rawToString(raw); } readRaw <- function(con, ...) { n <- readInt(con); if (n == 0) return(raw(0)); readBin(con, what=raw(0), n=n); } readWVT <- function(con, ...) { name <- readWString(con); raw <- readRaw(con); type <- readWString(con); # Update data types # * text/x-calvin-integer-8 # * text/x-calvin-unsigned-integer-8 # * text/x-calvin-integer-16 # * text/x-calvin-unsigned-integer-16 # * text/x-calvin-integer-32 # * text/x-calvin-unsigned-integer-32 # * text/x-calvin-float # * text/plain n <- length(raw); value <- switch(type, "text/ascii" = { rawToString(raw); }, "text/plain" = { # Unicode/UTF-16?!? raw <- matrix(raw, ncol=2, byrow=TRUE); raw <- raw[,2]; rawToString(raw); }, "text/x-calvin-integer-8" = { readBin(raw, what=integer(0), endian="big", size=1, signed=TRUE, n=n); }, "text/x-calvin-unsigned-integer-8" = { readBin(raw, what=integer(0), endian="big", size=1, signed=FALSE, n=n); }, "text/x-calvin-integer-16" = { readBin(raw, what=integer(0), endian="big", size=2, signed=TRUE, n=n/2); }, "text/x-calvin-unsigned-integer-16" = { readBin(raw, what=integer(0), endian="big", size=2, signed=FALSE, n=n/2); }, "text/x-calvin-integer-32" = { readBin(raw, what=integer(0), endian="big", size=4, signed=TRUE, n=n/4); }, "text/x-calvin-unsigned-integer-32" = { # NOTE: Ideally we would use signed=FALSE here, but there is no # integer data type in R that can hold 4-byte unsigned integers. # Because of this limitation, readBin() will give a warning that # signed=FALSE only works for size=1 or 2. # WORKAROUND: Use signed=TRUE and assume there are no values # greater that .Machine$integer.max == 2^31-1. /HB 2015-04-15 readBin(raw, what=integer(0), endian="big", size=4, signed=TRUE, n=n/4); }, "text/x-calvin-float" = { readBin(raw, what=double(0), endian="big", size=4, n=n/4); }, { raw; } ) # switch() list(name=name, value=value, raw=raw, type=type); } # readWVT() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - hasFilter <- FALSE; if (!is.null(.filter)) { hasFilter <- TRUE; } # Nothing to do? if (hasFilter) { if (identical(.filter, FALSE) || length(.filter) == 0) return(NULL); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - hdr <- list( dataTypeId = readString(con), fileId = readString(con), timestamp = readWString(con), locale = readWString(con) ) # Reading parameters nbrOfParams <- readInt(con); params <- vector("list", nbrOfParams); names <- character(nbrOfParams); for (kk in seq_len(nbrOfParams)) { wvt <- readWVT(con); names[kk] <- wvt$name; value <- wvt$value; # attr(value, "raw") <- wvt$raw; attr(value, "mimeType") <- wvt$type; params[[kk]] <- value; } names(params) <- names; hdr$parameters <- params; # Reading parent headers nbrOfParents <- readInt(con); parents <- vector("list", nbrOfParents); for (kk in seq_len(nbrOfParents)) { parents[[kk]] <- .readCcgDataHeader(con); } hdr$parents <- parents; hdr; } # .readCcgDataHeader() ############################################################################ # HISTORY: # 2012-05-18 # o Now using stop() instead of throw(). # 2011-11-01 # o CLEANUP: Changed signed=FALSE to signed=TRUE for readBin() calls # reading 4-byte integers in internal .readCcgFileHeader() and # .readCcgDataHeader(). # 2009-02-10 # o Added internal rawToString() replacing rawToChar() to avoid warnings # on "truncating string with embedded nul". # 2007-08-16 # o Now the read data is converted according to the mime type. See internal # readWVT() function. The code is still ad hoc, so it is not generic. # For instance, it basically assumes that Unicode strings only contain # ASCII/ASCII-8 characters. # 2006-11-06 # o Tested on Test3-1-121502.calvin.CEL and Test3-1-121502.calvin.CDF. # o Created. ############################################################################ affxparser/R/readCdf.R0000644000175200017520000003152214516003651015615 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction readCdf # # @title "Parsing a CDF file using Affymetrix Fusion SDK" # # \description{ # @get "title". # This function parses a CDF file using the Affymetrix Fusion SDK. # \emph{This function will most likely be replaced by the more # general \code{\link{readCdfUnits}()} function.} # } # # \usage{ # readCdf(filename, units=NULL, # readXY=TRUE, readBases=TRUE, # readIndexpos=TRUE, readAtoms=TRUE, # readUnitType=TRUE, readUnitDirection=TRUE, # readUnitNumber=TRUE, readUnitAtomNumbers=TRUE, # readGroupAtomNumbers=TRUE, readGroupDirection=TRUE, # readIndices=FALSE, readIsPm=FALSE, # stratifyBy=c("nothing", "pmmm", "pm", "mm"), # verbose=0) # } # # \arguments{ # \item{filename}{The filename of the CDF file.} # \item{units}{An @integer @vector of unit indices # specifying which units to be read. If @NULL, all units are read.} # \item{readXY}{If @TRUE, cell row and column (x,y) coordinates are # retrieved, otherwise not.} # \item{readBases}{If @TRUE, cell P and T bases are retrieved, otherwise not.} # \item{readIndexpos}{If @TRUE, cell indexpos are retrieved, otherwise not.} # \item{readExpos}{If @TRUE, cell "expos" values are retrieved, otherwise not.} # \item{readUnitType}{If @TRUE, unit types are retrieved, otherwise not.} # \item{readUnitDirection}{If @TRUE, unit directions are retrieved, otherwise not.} # \item{readUnitNumber}{If @TRUE, unit numbers are retrieved, otherwise not.} # \item{readUnitAtomNumbers}{If @TRUE, unit atom numbers are retrieved, otherwise not.} # \item{readGroupAtomNumbers}{If @TRUE, group atom numbers are retrieved, otherwise not.} # \item{readGroupDirection}{If @TRUE, group directions are retrieved, otherwise not.} # \item{readIndices}{If @TRUE, cell indices \emph{calculated} from # the row and column (x,y) coordinates are retrieved, otherwise not. # Note that these indices are \emph{one-based}.} # \item{readIsPm}{If @TRUE, cell flags indicating whether the cell # is a perfect-match (PM) probe or not are retrieved, otherwise not.} # \item{stratifyBy}{A @character string specifying which and how # elements in group fields are returned. # If \code{"nothing"}, elements are returned as is, i.e. as @vectors. # If \code{"pm"}/\code{"mm"}, only elements corresponding to # perfect-match (PM) / mismatch (MM) probes are returned (as @vectors). # If \code{"pmmm"}, elements are returned as a matrix where the # first row holds elements corresponding to PM probes and the second # corresponding to MM probes. Note that in this case, it is assumed # that there are equal number of PMs and MMs; if not, an error is # generated. # Moreover, the PMs and MMs may not even be paired, i.e. there is no # guarantee that the two elements in a column corresponds to a # PM-MM pair.} # \item{verbose}{An @integer specifying the verbose level. If 0, the # file is parsed quietly. The higher numbers, the more details.} # } # # \value{ # A list with one component for each unit. Every component is again a # list with three components # \item{groups}{This is again a list with one component for each group # (also called block). The information on each group is a list with 5 # components, \code{x}, \code{y}, \code{pbase}, \code{tbase}, # \code{expos}.} # \item{type}{type of the unit.} # \item{direction}{direction of the unit.} # } # # \note{ # This version of the function does not return information on the QC # probes. This will be added in a (near) future release. In addition we # expect the header to be part of the returned object. # # So expect changes to the structure of the value of the function in # next release. Please contact the developers for details. # } # # \section{Cell indices are one-based}{ # Note that in \pkg{affxparser} all \emph{cell indices} are by # convention \emph{one-based}, which is more convenient to work # with in \R. For more details on one-based indices, see # @see "2. Cell coordinates and cell indices". # } # # \author{ # James Bullard and # Kasper Daniel Hansen. # } # # \seealso{ # It is recommended to use @see "readCdfUnits" instead of this method. # @see "readCdfHeader" for getting the header of a CDF file. # } # # \references{ # [1] Affymetrix Inc, Affymetrix GCOS 1.x compatible file formats, # June 14, 2005. # \url{http://www.affymetrix.com/support/developer/} # } # # @keyword "file" # @keyword "IO" # @keyword "internal" #*/######################################################################### readCdf <- function(filename, units=NULL, readXY=TRUE, readBases=TRUE, readIndexpos=TRUE, readAtoms=TRUE, readUnitType=TRUE, readUnitDirection=TRUE, readUnitNumber=TRUE, readUnitAtomNumbers=TRUE, readGroupAtomNumbers=TRUE, readGroupDirection=TRUE, readIndices=FALSE, readIsPm=FALSE, stratifyBy=c("nothing", "pmmm", "pm", "mm"), verbose=0) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - filename <- file.path(dirname(filename), basename(filename)) if (!file.exists(filename)) stop("File not found: ", filename) ## Argument 'units': if (! (is.null(units) || is.numeric(units))) { stop("Argument 'units' must be numeric or NULL: ", class(units)[1]) } ## other arguments stratifyBy <- match.arg(stratifyBy) returnUnitType <- as.logical(readUnitType) returnUnitDirection <- as.logical(readUnitDirection) returnUnitAtomNumbers <- as.logical(readUnitAtomNumbers) returnUnitNumber <- as.logical(readUnitNumber) returnXY <- as.logical(readXY) returnIndices <- as.logical(readIndices) returnBases <- as.logical(readBases) returnAtoms <- as.logical(readAtoms) returnIndexpos <- as.logical(readIndexpos) returnIsPm <- as.logical(readIsPm) returnBlockDirection <- as.logical(readGroupDirection) returnBlockAtomNumbers <- as.logical(readGroupAtomNumbers) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read the CDF structure # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # UNSUPPORTED CASE? if (!is.null(units) && length(units) == 0L) { stop("readCdf(..., units=integer(0)) is not supported.") } cdf <- .Call("R_affx_get_cdf_file", filename, as.integer(units), as.integer(verbose), returnUnitType, returnUnitDirection, returnUnitAtomNumbers, returnUnitNumber, returnXY, returnIndices, returnBases, returnAtoms, returnIndexpos, returnIsPm, returnBlockDirection, returnBlockAtomNumbers, PACKAGE="affxparser") # Sanity check if (is.null(cdf)) { stop("Failed to read CDF file: ", filename); } if (stratifyBy == "nothing") return(cdf); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Stratify by PM/MM # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - isPm <- readCdfIsPm(filename, units=units); ## Using .subset2() instead of "[["() to avoid dispatching overhead etc. if (stratifyBy == "pmmm") { dimnames <- list(c("pm", "mm"), NULL); for (uu in seq_along(cdf)) { groups <- .subset2(.subset2(cdf, uu), "groups"); ngroups <- length(groups); if (ngroups == 0) next; for (gg in 1:ngroups) { group <- .subset2(groups, gg); groupNoStratifyNames <- intersect(c("groupdirection", "natoms", "ncellsperatom"), names(group)) if(length(groupNoStratifyNames) > 0) { groupNoStratifyElements <- group[groupNoStratifyNames] group <- group[setdiff(names(group), c("groupdirection", "natoms", "ncellsperatom"))] } pm <- .subset2(.subset2(isPm, uu), gg); idx <- 1:length(pm); mm <- idx[!pm]; # Note: which(!pm) is about 60% slower! /HB pm <- idx[pm]; npm <- length(pm); if (npm != length(mm)) { ## This is not expected to happen, but just in case. stop("Number of PM and MM probes differ in probeset #", uu, ": ", length(pm), " != ", length(mm)); } pmmm <- matrix(c(pm, mm), nrow=2L, ncol=npm, byrow=TRUE); ## Re-order cell elements according to PM/MM. ngroup <- length(group); if (ngroup > 0) { dim <- c(2, npm); for (kk in 1:ngroup) { ## value <- group[[kk]][pmmm]; value <- .subset(.subset2(group, kk), pmmm); dim(value) <- dim; dimnames(value) <- dimnames; group[[kk]] <- value; } } if(length(groupNoStratifyNames) > 0) { group <- c(group, groupNoStratifyElements) } groups[[gg]] <- group; } # for (gg ...) cdf[[uu]]$groups <- groups; } # for (uu ...) } else if (stratifyBy == "pm") { for (uu in seq_along(cdf)) { groups <- cdf[[uu]]$groups; ngroups <- length(groups); if (ngroups == 0) next; for (gg in 1:ngroups) { group <- groups[[gg]]; groupNoStratifyNames <- intersect(c("groupdirection", "natoms", "ncellsperatom"), names(group)) if(length(groupNoStratifyNames) > 0) { groupNoStratifyElements <- group[groupNoStratifyNames] group <- group[setdiff(names(group), c("groupdirection", "natoms", "ncellsperatom"))] } ngroup <- length(group); if (ngroup == 0) next; pm <- .subset2(.subset2(isPm, uu), gg); pm <- (1:length(pm))[pm]; # Note: which(!pm) is about 60% slower! for (kk in 1:ngroup) { group[[kk]] <- .subset(.subset2(group, kk), pm); } if(length(groupNoStratifyNames) > 0) { group <- c(group, groupNoStratifyElements) } groups[[gg]] <- group; } # for (gg ...) cdf[[uu]]$groups <- groups; } # for (uu ...) } else if (stratifyBy == "mm") { for (uu in seq_along(cdf)) { groups <- cdf[[uu]]$groups; ngroups <- length(groups); if (ngroups == 0) next; for (gg in 1:ngroups) { group <- groups[[gg]]; groupNoStratifyNames <- intersect(c("groupdirection", "natoms", "ncellsperatom"), names(group)) if(length(groupNoStratifyNames) > 0) { groupNoStratifyElements <- group[groupNoStratifyNames] group <- group[setdiff(names(group), c("groupdirection", "natoms", "ncellsperatom"))] } ngroup <- length(group); if (ngroup == 0) next; pm <- .subset2(.subset2(isPm, uu), gg); mm <- (1:length(pm))[!pm]; # Note: which(!pm) is about 60% slower! for (kk in 1:ngroup) { group[[kk]] <- .subset(.subset2(group, kk), mm); } if(length(groupNoStratifyNames) > 0) { group <- c(group, groupNoStratifyElements) } groups[[gg]] <- group; } # for (gg ...) cdf[[uu]]$groups <- groups; } # for (uu ...) } cdf; } # readCdf() ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. # 2011-02-15 # o DOCUMENTATION: Converted the Rd to an Rdoc comment in this file. # 2010-12-12 # o ROBUSTNESS: Replaces .Internal(matrix(...)) with matrix(). # In the upcoming R 2.13.0 matrix() has less overhead. ############################################################################ affxparser/R/readCdfCellIndices.R0000644000175200017520000002051214516003651017711 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction readCdfCellIndices # # @title "Reads (one-based) cell indices of units (probesets) in an Affymetrix CDF file" # # @synopsis # # \description{ # @get "title". # } # # \arguments{ # \item{filename}{The filename of the CDF file.} # \item{units}{An @integer @vector of unit indices # specifying which units to be read. If @NULL, all units are read.} # \item{stratifyBy}{A \code{\link[base]{character}} string specifying which and how # elements in group fields are returned. # If \code{"nothing"}, elements are returned as is, i.e. as @vectors. # If \code{"pm"}/\code{"mm"}, only elements corresponding to # perfect-match (PM) / mismatch (MM) probes are returned (as @vectors). # If \code{"pmmm"}, elements are returned as a matrix where the # first row holds elements corresponding to PM probes and the second # corresponding to MM probes. Note that in this case, it is assumed # that there are equal number of PMs and MMs; if not, an error is # generated. # Moreover, the PMs and MMs may not even be paired, i.e. there is no # guarantee that the two elements in a column corresponds to a # PM-MM pair.} # \item{verbose}{An @integer specifying the verbose level. If 0, the # file is parsed quietly. The higher numbers, the more details.} # } # # \value{ # A named @list where the names corresponds to the names # of the units read. Each unit element of the list is in turn a # @list structure with one element \code{groups} which in turn # is a @list. Each group element in \code{groups} is a @list # with a single field named \code{indices}. Thus, the structure is # \preformatted{ # cdf # +- unit #1 # | +- "groups" # | +- group #1 # | | +- "indices" # | | group #2 # | | +- "indices" # | . # | +- group #K # | +- "indices" # +- unit #2 # . # +- unit #J # } # # This is structure is compatible with what @see "readCdfUnits" returns. # # Note that these indices are \emph{one-based}. # } # # \section{Cell indices are one-based}{ # Note that in \pkg{affxparser} all \emph{cell indices} are by # convention \emph{one-based}, which is more convenient to work # with in \R. For more details on one-based indices, see # @see "2. Cell coordinates and cell indices". # } # # @author "HB" # # \seealso{ # @see "readCdfUnits". # } # # @keyword "file" # @keyword "IO" #*/######################################################################### readCdfCellIndices <- function(filename, units=NULL, stratifyBy=c("nothing", "pmmm", "pm", "mm"), verbose=0) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filename': filename <- file.path(dirname(filename), basename(filename)); if (!file.exists(filename)) stop("File not found: ", filename); # Argument 'units': if (is.null(units)) { } else if (is.numeric(units)) { units <- as.integer(units); if (any(units < 1)) stop("Argument 'units' contains non-positive indices."); } else { stop("Argument 'units' must be numeric or NULL: ", class(units)[1]); } # Argument 'verbose': if (length(verbose) != 1) stop("Argument 'verbose' must be a single integer."); verbose <- as.integer(verbose); if (!is.finite(verbose)) stop("Argument 'verbose' must be an integer: ", verbose); # Argument 'stratifyBy': stratifyBy <- match.arg(stratifyBy); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read the CDF file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # UNSUPPORTED CASE? if (!is.null(units) && length(units) == 0L) { stop("readCdfCellIndices(..., units=integer(0)) is not supported.") } cdf <- .Call("R_affx_get_cdf_cell_indices", filename, units, verbose, PACKAGE="affxparser"); # Sanity check if (is.null(cdf)) { stop("Failed to read cell indices from CDF file: ", filename); } if (stratifyBy == "nothing") return(cdf); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Stratify by PM, MM, or PM & MM # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - isPm <- readCdfIsPm(filename, units=units); # Using .subset2() instead of "[["() to avoid dispatching overhead etc. if (stratifyBy == "pmmm") { dimnames <- list(c("pm", "mm"), NULL); for (uu in seq_along(cdf)) { # groups <- cdf[[uu]]$groups; groups <- .subset2(.subset2(cdf, uu), "groups"); ngroups <- length(groups); if (ngroups == 0) next; for (gg in 1:ngroups) { # group <- groups[[gg]]; group <- .subset2(groups, gg); # pm <- isPm[[uu]][[gg]]; pm <- .subset2(.subset2(isPm, uu), gg); idx <- 1:length(pm); mm <- idx[!pm]; # Note: which(!pm) is about 60% slower! /HB pm <- idx[pm]; npm <- length(pm); if (npm != length(mm)) { # This is not expected to happen, but just in case. stop("Number of PM and MM probes differ in probeset #", uu, ": ", length(pm), " != ", length(mm)); } pmmm <- matrix(c(pm, mm), nrow=2L, ncol=npm, byrow=TRUE); # dimnames(pmmm) <- dimnames; # Re-order cell elements according to PM/MM. dim <- c(2, npm); # value <- group[[1]][pmmm]; value <- .subset(.subset2(group, 1), pmmm); dim(value) <- dim; dimnames(value) <- dimnames; group[[1]] <- value; # group[["pmmm"]] <- pmmm; groups[[gg]] <- group; } # for (gg ...) cdf[[uu]]$groups <- groups; } # for (uu ...) } else if (stratifyBy == "pm") { for (uu in seq_along(cdf)) { # groups <- cdf[[uu]]$groups; groups <- .subset2(.subset2(cdf, uu), "groups"); ngroups <- length(groups); if (ngroups == 0) next; for (gg in 1:ngroups) { # group <- groups[[gg]]; group <- .subset2(groups, gg); ngroup <- length(group); if (ngroup == 0) next; pm <- .subset2(.subset2(isPm, uu), gg); pm <- (1:length(pm))[pm]; # Note: which(pm) is about 60% slower! for (kk in 1:ngroup) { group[[kk]] <- .subset(.subset2(group, kk), pm); } groups[[gg]] <- group; } # for (gg ...) cdf[[uu]]$groups <- groups; } # for (uu ...) } else if (stratifyBy == "mm") { for (uu in seq_along(cdf)) { # groups <- cdf[[uu]]$groups; groups <- .subset2(.subset2(cdf, uu), "groups"); ngroups <- length(groups); if (ngroups == 0) next; for (gg in 1:ngroups) { # group <- groups[[gg]]; group <- .subset2(groups, gg); ngroup <- length(group); if (ngroup == 0) next; pm <- .subset2(.subset2(isPm, uu), gg); mm <- (1:length(pm))[!pm]; # Note: which(!pm) is about 60% slower! for (kk in 1:ngroup) { group[[kk]] <- .subset(.subset2(group, kk), mm); } groups[[gg]] <- group; } # for (gg ...) cdf[[uu]]$groups <- groups; } # for (uu ...) } cdf; } # readCdfUnitInidices() ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. # 2010-12-12 # o ROBUSTNESS: Replaces .Internal(matrix(...)) with matrix(). # In the upcoming R 2.13.0 matrix() has less overhead. # 2006-12-10 # o BUG FIX: Same stratifyBy="mm" bug here as in readCdfUnits(). # 2006-07-22 # o Making more use of .subset2(). # 2006-05-20 # o Rd fix: The \value{} was incorrect. # 2006-05-12 # o Removed argument 'flat'. Will not be used for a while. The intention # was to remove the redundant levels of "groups" and possibly also the # "indices" level. That would most likely speed up things a bit, but it # would also require that readCelUnits() understand this other format too. # 2006-04-01 # o Created, because it is very commonly used and is about 5 times faster # than using readCdfUnits(..., readIndices=TRUE). /HB ############################################################################ affxparser/R/readCdfDataFrame.R0000644000175200017520000002560014516003651017362 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction readCdfDataFrame # # @title "Reads units (probesets) from an Affymetrix CDF file" # # @synopsis # # \description{ # @get "title". Gets all or a subset of units (probesets). # } # # \arguments{ # \item{filename}{The filename of the CDF file.} # \item{units}{An @integer @vector of unit indices # specifying which units to be read. If @NULL, all are read.} # \item{groups}{An @integer @vector of group indices # specifying which groups to be read. If @NULL, all are read.} # \item{cells}{An @integer @vector of cell indices # specifying which cells to be read. If @NULL, all are read.} # \item{fields}{A @character @vector specifying what fields to read. # If @NULL, all unit, group and cell fields are returned.} # \item{drop}{If @TRUE and only one field is read, then a @vector # (rather than a single-column @data.frame) is returned.} # \item{verbose}{An @integer specifying the verbose level. If 0, the # file is parsed quietly. The higher numbers, the more details.} # } # # \value{ # An NxK @data.frame or a @vector of length N. # } # # @author "HB" # # @examples "../incl/readCdfDataFrame.Rex" # # \seealso{ # For retrieving the CDF as a @list structure, see # @see "affxparser::readCdfUnits". # } # # \references{ # [1] Affymetrix Inc, Affymetrix GCOS 1.x compatible file formats, # June 14, 2005. # \url{http://www.affymetrix.com/support/developer/} # } # # @keyword "file" # @keyword "IO" # @keyword "internal" #*/######################################################################### readCdfDataFrame <- function(filename, units=NULL, groups=NULL, cells=NULL, fields=NULL, drop=TRUE, verbose=0) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filename': filename <- file.path(dirname(filename), basename(filename)); if (!file.exists(filename)) stop("File not found: ", filename); # Argument 'units': if (is.null(units)) { } else if (is.numeric(units)) { units <- as.integer(units); if (any(units < 1)) stop("Argument 'units' contains non-positive indices."); } else { stop("Argument 'units' must be numeric or NULL: ", class(units)[1]); } # Argument 'groups': if (is.null(groups)) { } else if (is.numeric(groups)) { groups <- as.integer(groups); if (any(groups < 1)) stop("Argument 'groups' contains non-positive indices."); } else { stop("Argument 'groups' must be numeric or NULL: ", class(groups)[1]); } # Argument 'fields': ## knownUnitFields <- c("unit", "unitName", "unitDirection", "nbrOfUnitAtoms", "unitSize", "unitNumber", "unitType", "nbrOfGroups", "mutationType"); ## knownGroupFields <- c("group", "groupName", "nbrOfGroupAtoms", "groupSize", "firstAtom", "lastAtom", "groupDirection"); ## knownCellFields <- c("cell", "x", "y", "probeSequence", "feat", "qual", "expos", "pos", "cbase", "pbase", "tbase", "atom", "index"); if (is.null(fields)) { knownUnitFields <- c("unit", "unitName", "unitType", "unitDirection", "unitNbrOfAtoms"); knownGroupFields <- c("group", "groupName", "groupDirection", "groupNbrOfAtoms"); knownCellFields <- c("cell", "x", "y", "pbase", "tbase", "indexPos", "atom", "expos"); fields <- c(knownUnitFields, knownGroupFields, knownCellFields); } # Argument 'verbose': if (length(verbose) != 1) stop("Argument 'verbose' must be a single integer."); verbose <- as.integer(verbose); if (!is.finite(verbose)) stop("Argument 'verbose' must be an integer: ", verbose); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Prepare the arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - readFields <- c(fields, "cell"); # Need to read one cell field! readFields <- unique(readFields); # Unit fields readUnitType <- ("unitType" %in% readFields); readUnitDirection <- ("unitDirection" %in% readFields); readUnitNumber <- ("unitNumber" %in% readFields); readUnitAtomNumbers <- ("unitNbrOfAtoms" %in% readFields); # Group fields readGroupDirection <- ("groupDirection" %in% readFields); readGroupAtomNumbers <- ("groupNbrOfAtoms" %in% readFields); # Cell fields readXY <- any(c("x", "y") %in% readFields); readIndices <- ("cell" %in% readFields); readBases <- any(c("tbase", "pbase") %in% readFields); readIndexpos <- ("indexPos" %in% readFields); readExpos <- ("expos" %in% readFields); readAtoms <- ("atom" %in% readFields); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Query the CDF # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # UNSUPPORTED CASE? if (!is.null(units) && length(units) == 0L) { stop("readCdfDataFrame(..., units=integer(0)) is not supported.") } cdf <- readCdf(filename, units=units, readXY=readXY, readBases=readBases, readIndexpos=readIndexpos, readAtoms=readAtoms, readUnitType=readUnitType, readUnitDirection=readUnitDirection, readUnitNumber=readUnitNumber, readUnitAtomNumbers=readUnitAtomNumbers, readGroupAtomNumbers=readGroupAtomNumbers, readGroupDirection=readGroupDirection, readIndices=readIndices, verbose=verbose-1); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Flatten CDF list structure unit by unit # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.null(units)) units <- seq_along(cdf); ## FIX ME groupIdxs <- groups; unitNames <- names(cdf); for (uu in seq_along(cdf)) { unit <- .subset2(cdf, uu); unitName <- .subset(unitNames, uu); if (verbose >= 1) { if (uu %% 500 == 1) { unitsLeft <- length(cdf) - uu + 1; cat(unitsLeft, ", ", sep=""); } } groups <- .subset2(unit, "groups"); unit[["groups"]] <- NULL; # Translate unit names (has to be done here because not unique) names <- names(unit); names <- sub("ncells", "unitNbrOfCells", names); names <- sub("natoms", "unitNbrOfAtoms", names); names <- sub("unitnumber", "unitNumber", names); names(unit) <- names; unitData <- list(unit=.subset(units, uu), unitName=unitName); unitData <- c(unitData, unit); # Extract groups of interest? if (is.null(groupIdxs)) { ggs <- seq_along(groups); } else { keep <- which(seq_along(groups) %in% groupIdxs); groups <- .subset(groups, keep); ggs <- groupIdxs; } # Flatten (group, cell) data groupNames <- names(groups); for (gg in seq_along(ggs)) { group <- .subset2(groups, gg); groupName <- .subset(groupNames, gg); groupData <- list(group=.subset(ggs, gg), groupName=groupName); # Extract group fields keys <- c("groupdirection", "natoms", "ncellsperatom", "ncells"); idxs <- which(names(group) %in% keys); if (length(idxs) > 0) { groupData <- c(groupData, .subset(group, idxs)); group <- .subset(group, -idxs); } # Extract cell fields cellData <- as.data.frame(group, stringsAsFactors=FALSE); # Extract cells of interest? if (!is.null(cells)) { keep <- (seq_len(nrow(cellData)) %in% cells); cellData <- cellData[keep,,drop=FALSE]; } # Expand group fields nbrOfCells <- nrow(cellData); for (key in names(groupData)) { groupData[[key]] <- rep(.subset2(groupData, key), times=nbrOfCells); } groupData <- as.data.frame(groupData, stringsAsFactors=FALSE); group <- cbind(groupData, cellData); groups[[gg]] <- group; } # Stack (rbind) groups stackedGroups <- NULL; for (gg in seq_along(groups)) { stackedGroups <- rbind(stackedGroups, .subset2(groups, gg)); } nbrOfCells <- nrow(stackedGroups); for (key in names(unitData)) { unitData[[key]] <- rep(.subset2(unitData, key), times=nbrOfCells); } unitData <- as.data.frame(unitData, stringsAsFactors=FALSE); unit <- cbind(unitData, stackedGroups); cdf[[uu]] <- unit; } # for (uu ...) if (verbose >= 1) { cat("0.\n"); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Flatten the remaining list structure # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Allocate "data frame" of right size unitSizes <- sapply(cdf, FUN=nrow); nbrOfCells <- sum(unitSizes); dataTypes <- sapply(.subset2(cdf, 1), FUN=storage.mode); df <- vector("list", length(dataTypes)); names(df) <- names(dataTypes); for (key in names(df)) { df[[key]] <- vector(.subset2(dataTypes, key), length=nbrOfCells); } # Copy values from the CDF list structure offset <- 0; for (uu in seq_along(cdf)) { data <- .subset2(cdf, uu); nrow <- nrow(data); idxs <- offset + 1:nrow; for (key in names(df)) { df[[key]][idxs] <- .subset2(data, key); } offset <- offset + nrow; cdf[[uu]] <- NA; } names <- names(df); # Translate unit names names <- sub("unittype", "unitType", names); names <- sub("unitdirection", "unitDirection", names); names <- sub("ncellsperatom", "unitNbrOfCellsPerAtom", names); # Translate group names names <- sub("groupdirection", "groupDirection", names); names <- sub("natoms", "groupNbrOfAtoms", names); names <- sub("ncellsperatom", "groupNbrOfCellsPerAtom", names); names <- sub("ncells", "groupNbrOfCells", names); # Translate cell names names <- sub("indices", "cell", names); names <- sub("indexpos", "indexPos", names); names(df) <- names; # Extract fields of interest unknown <- setdiff(fields, names(df)) if (length(unknown) > 0) { warning("Some of the fields were not read: ", paste(unknown, collapse=", ")); } fields <- intersect(fields, names(df)); df <- .subset(df, fields); # Make it a valid data frame if (drop && length(df) == 1) { df <- .subset2(df, 1); } else { attr(df, "row.names") <- .set_row_names(nbrOfCells); attr(df, "class") <- "data.frame"; } df; } # readCdfDataFrame() ############################################################################ # HISTORY: # 2008-04-03 [HB] # o Now the renaming of fields is mostly done at the end and not in every # iteration. That speeds up the process 5-10%. # o Replaced all [[() and [() with .subset2() and .subset(), respectively. # This should make it a little bit faster. # o For my record, some benchmarking on readCdfDataFrame(): # Mapping10K_Xba142.cdf: 408,400x16, 23.3Mb, 610 seconds. # 2008-03-24 [HB] # o Created a readCdfDataFrame() that uses readCdf() to read the data and # then restructures it. This will be used as a reference for the final # implementation. # o Created a stub for readCdfDataFrame(). ############################################################################ affxparser/R/readCdfGroupNames.R0000644000175200017520000000634314516003651017621 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction readCdfGroupNames # # @title "Reads group names for a set of units (probesets) in an Affymetrix CDF file" # # @synopsis # # \description{ # @get "title". # # This is for instance useful for SNP arrays where the nucleotides used # for the A and B alleles are the same as the group names. # } # # \arguments{ # \item{filename}{The filename of the CDF file.} # \item{units}{An @integer @vector of unit indices specifying which # units to be read. If @NULL, all units are read.} # \item{truncateGroupNames}{A @logical variable indicating whether unit # names should be stripped from the beginning of group names.} # \item{verbose}{An @integer specifying the verbose level. If 0, the # file is parsed quietly. The higher numbers, the more details.} # } # # \value{ # A named @list structure where the names of the elements are the names # of the units read. Each element is a @character @vector with group # names for the corresponding unit. # } # # @author "HB" # # \seealso{ # @see "readCdfUnits". # } # # # # @keyword "file" # @keyword "IO" #*/######################################################################### readCdfGroupNames <- function(filename, units=NULL, truncateGroupNames=TRUE, verbose=0) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filename': filename <- file.path(dirname(filename), basename(filename)); if (!file.exists(filename)) stop("File not found: ", filename); # Argument 'units': if (is.null(units)) { } else if (is.numeric(units)) { units <- as.integer(units); if (any(units < 1)) stop("Argument 'units' contains non-positive indices."); } else { stop("Argument 'units' must be numeric or NULL: ", class(units)[1]); } # Argument 'truncateGroupNames': truncateGroupNames <- as.logical(truncateGroupNames); # Argument 'verbose': if (length(verbose) != 1) stop("Argument 'verbose' must be a single integer."); verbose <- as.integer(verbose); if (!is.finite(verbose)) stop("Argument 'verbose' must be an integer: ", verbose); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read the CDF file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # UNSUPPORTED CASE? if (!is.null(units) && length(units) == 0L) { stop("readCdfGroupNames(..., units=integer(0)) is not supported.") } res <- .Call("R_affx_cdf_groupNames", filename, units, truncateGroupNames, verbose, PACKAGE="affxparser"); # Sanity check if (is.null(res)) { stop("Failed to read unit group names from CDF file: ", filename); } res; } ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. # 2007-03-05 # o Added argument truncateGroupNames. Also see R_affx_cdf_group_names(). /KS # 2006-03-28 # o Unit indices are now one-based. /HB # 2006-01-12 # o Created. /HB ############################################################################ affxparser/R/readCdfHeader.R0000644000175200017520000000223514516003651016725 0ustar00biocbuildbiocbuildreadCdfHeader <- function(filename) { # Expand '~' pathnames to full pathnames. filename <- file.path(dirname(filename), basename(filename)); if (!file.exists(filename)) stop("Cannot read CDF header. File not found: ", filename); res <- .Call("R_affx_get_cdf_file_header", filename, PACKAGE="affxparser"); # Sanity check if (is.null(res)) { stop("Failed to read CDF file header: ", filename); } # Adding backward compatibility for now. /HB 2006-09-07 res$rows <- res$nrows; res$cols <- res$ncols; res$probesets <- res$nunits; res$qcprobesets <- res$nqcunits; res$reference <- res$refseq; # Workaround for a bug in Fusion SDK. /HB 2008-12-04 # If the CDF file is on a Windows share, then the from # pathname inferred chip type contains a path as well. # Reference: See aroma.affymetrix thread on Dec 4, 2008. res$chiptype <- basename(res$chiptype); res; } # readCdfHeader() ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. ############################################################################ affxparser/R/readCdfIsPm.R0000644000175200017520000000524014516003651016404 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction readCdfIsPm # # @title "Checks if cells in a CDF file are perfect-match probes or not" # # @synopsis # # \description{ # @get "title". # } # # \arguments{ # \item{filename}{The filename of the CDF file.} # \item{units}{An @integer @vector of unit indices specifying which units # to be read. If @NULL, all units are read.} # \item{verbose}{An @integer specifying the verbose level. If 0, the # file is parsed quietly. The higher numbers, the more details.} # } # # \value{ # A named @list of named @logical vectors. The name of the list elements # are unit names and the names of the logical vector are group names. # } # # @author "HB" # # @keyword "file" # @keyword "IO" # @keyword "internal" #*/######################################################################### readCdfIsPm <- function(filename, units=NULL, verbose=0) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filename': filename <- file.path(dirname(filename), basename(filename)); if (!file.exists(filename)) stop("File not found: ", filename); # Argument 'units': if (is.null(units)) { } else if (is.numeric(units)) { units <- as.integer(units); if (any(units < 1)) stop("Argument 'units' contains non-positive indices."); } else { stop("Argument 'units' must be numeric or NULL: ", class(units)[1]); } # Argument 'verbose': if (length(verbose) != 1) stop("Argument 'verbose' must be a single integer."); verbose <- as.integer(verbose); if (!is.finite(verbose)) stop("Argument 'verbose' must be an integer: ", verbose); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read the CDF file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # UNSUPPORTED CASE? if (!is.null(units) && length(units) == 0L) { stop("readCdfIsPm(..., units=integer(0)) is not supported.") } res <- .Call("R_affx_cdf_isPm", filename, units, verbose, PACKAGE="affxparser"); # Sanity check if (is.null(res)) { stop("Failed to read PM information from CDF file: ", filename); } res; } ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. # 2006-05-12 # o Added Rdoc comments (converted from Rd). # 2006-03-28 # o Unit indices are now one-based. /HB # 2006-01-11 # o Created. /HB ############################################################################ affxparser/R/readCdfNbrOfCellsPerUnitGroup.R0000644000175200017520000000562614516003651022061 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction readCdfNbrOfCellsPerUnitGroup # # @title "Gets the number of cells (probes) that each group of each unit in a CDF file" # # @synopsis # # \description{ # @get "title". # } # # \arguments{ # \item{filename}{The filename of the CDF file.} # \item{units}{An @integer @vector of unit indices specifying which units # to be read. If @NULL, all units are read.} # \item{verbose}{An @integer specifying the verbose level. If 0, the # file is parsed quietly. The higher numbers, the more details.} # } # # \value{ # A named @list of named @integer vectors. The name of the list elements # are unit names and the names of the integer vector are group names. # } # # @examples "../incl/readCdfNbrOfCellsPerUnitGroup.Rex" # # @author "HB" # # @keyword "file" # @keyword "IO" # @keyword "internal" #*/######################################################################### readCdfNbrOfCellsPerUnitGroup <- function(filename, units=NULL, verbose=0) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filename': filename <- file.path(dirname(filename), basename(filename)); if (!file.exists(filename)) stop("File not found: ", filename); # Argument 'units': if (is.null(units)) { } else if (is.numeric(units)) { units <- as.integer(units); if (any(units < 1)) stop("Argument 'units' contains non-positive indices."); } else { stop("Argument 'units' must be numeric or NULL: ", class(units)[1]); } # Argument 'verbose': if (length(verbose) != 1) stop("Argument 'verbose' must be a single integer."); verbose <- as.integer(verbose); if (!is.finite(verbose)) stop("Argument 'verbose' must be an integer: ", verbose); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read the CDF file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # UNSUPPORTED CASE? if (!is.null(units) && length(units) == 0L) { stop("readCdfNbrOfCellsPerUnitGroup(..., units=integer(0)) is not supported.") } res <- .Call("R_affx_cdf_nbrOfCellsPerUnitGroup", filename, units, verbose, PACKAGE="affxparser"); # Sanity check if (is.null(res)) { stop("Failed to read number of cells per unit group from CDF file: ", filename); } res; } # readCdfNbrOfCellsPerUnitGroup() ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. # 2006-05-12 # o Added Rdoc comments (converted from Rd). # 2006-03-28 # o Unit indices are now one-based. /HB # 2006-01-11 # o Created. /HB ############################################################################ affxparser/R/readCdfQc.R0000644000175200017520000000217214516003651016100 0ustar00biocbuildbiocbuildreadCdfQc <- function(filename, units = NULL, verbose = 0){ filename <- file.path(dirname(filename), basename(filename)) if (!file.exists(filename)) stop("file:", filename, "does not exist.") returnIndices <- TRUE; returnXY <- TRUE; returnLength <- TRUE; returnPMInfo <- TRUE; returnBackgroundInfo <- TRUE; returnType <- TRUE; returnQcNumbers <- TRUE; # UNSUPPORTED CASE? if (!is.null(units) && length(units) == 0L) { stop("readCdfQc(..., units=integer(0)) is not supported.") } res <- .Call("R_affx_get_cdf_file_qc", filename, as.integer(units), as.integer(verbose), returnIndices, returnXY, returnLength, returnPMInfo, returnBackgroundInfo, returnType, returnQcNumbers); # Sanity check if (is.null(res)) { stop("Failed to read QC units from CDF file: ", filename); } res; } # readCdfQc() ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. ############################################################################ affxparser/R/readCdfUnitNames.R0000644000175200017520000000337314516003651017444 0ustar00biocbuildbiocbuildreadCdfUnitNames <- function(filename, units=NULL, verbose=0) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filename': filename <- file.path(dirname(filename), basename(filename)); if (!file.exists(filename)) stop("File not found: ", filename); # Argument 'units': if (is.null(units)) { } else if (is.numeric(units)) { units <- as.integer(units); if (any(units < 1)) stop("Argument 'units' contains non-positive indices."); } else { stop("Argument 'units' must be numeric or NULL: ", class(units)[1]); } # Argument 'verbose': if (length(verbose) != 1) stop("Argument 'units' must be a single integer."); verbose <- as.integer(verbose); if (!is.finite(verbose)) stop("Argument 'units' must be an integer: ", verbose); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read the CDF file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # UNSUPPORTED CASE? if (!is.null(units) && length(units) == 0L) { stop("readCdfUnitNames(..., units=integer(0)) is not supported.") } res <- .Call("R_affx_get_cdf_unit_names", filename, units, verbose, PACKAGE="affxparser"); # Sanity check if (is.null(res)) { stop("Failed to read unit names from CDF file: ", filename); } res; } # readCdfUnitNames() ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. # 2006-03-28 # o Unit indices are now one-based. /HB ############################################################################ affxparser/R/readCdfUnits.R0000644000175200017520000002523514516003651016644 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction readCdfUnits # # @title "Reads units (probesets) from an Affymetrix CDF file" # # @synopsis # # \description{ # @get "title". Gets all or a subset of units (probesets). # } # # \arguments{ # \item{filename}{The filename of the CDF file.} # \item{units}{An @integer @vector of unit indices # specifying which units to be read. If @NULL, all units are read.} # \item{readXY}{If @TRUE, cell row and column (x,y) coordinates are # retrieved, otherwise not.} # \item{readBases}{If @TRUE, cell P and T bases are retrieved, otherwise not.} # \item{readExpos}{If @TRUE, cell "expos" values are retrieved, otherwise not.} # \item{readType}{If @TRUE, unit types are retrieved, otherwise not.} # \item{readDirection}{If @TRUE, unit \emph{and} group directions are # retrieved, otherwise not.} # \item{stratifyBy}{A @character string specifying which and how # elements in group fields are returned. # If \code{"nothing"}, elements are returned as is, i.e. as @vectors. # If \code{"pm"}/\code{"mm"}, only elements corresponding to # perfect-match (PM) / mismatch (MM) probes are returned (as @vectors). # If \code{"pmmm"}, elements are returned as a matrix where the # first row holds elements corresponding to PM probes and the second # corresponding to MM probes. Note that in this case, it is assumed # that there are equal number of PMs and MMs; if not, an error is # generated. # Moreover, the PMs and MMs may not even be paired, i.e. there is no # guarantee that the two elements in a column corresponds to a # PM-MM pair.} # \item{readIndices}{If @TRUE, cell indices \emph{calculated} from # the row and column (x,y) coordinates are retrieved, otherwise not. # Note that these indices are \emph{one-based}.} # \item{verbose}{An @integer specifying the verbose level. If 0, the # file is parsed quietly. The higher numbers, the more details.} # } # # \value{ # A named @list where the names corresponds to the names # of the units read. Each element of the list is in turn a # @list structure with three components: # \item{groups}{A @list with one component for each group # (also called block). The information on each group is a # @list of up to seven components: \code{x}, \code{y}, # \code{pbase}, \code{tbase}, \code{expos}, \code{indices}, # and \code{direction}. # All fields but the latter have the same number of values as # there are cells in the group. The latter field has only # one value indicating the direction for the whole group. # } # \item{type}{An @integer specifying the type of the # unit, where 1 is "expression", 2 is "genotyping", 3 is "CustomSeq", # and 4 "tag".} # \item{direction}{An @integer specifying the direction # of the unit, which defines if the probes are interrogating the sense # or the anti-sense target, where 0 is "no direction", 1 is "sense", and # 2 is "anti-sense".} # } # # \section{Cell indices are one-based}{ # Note that in \pkg{affxparser} all \emph{cell indices} are by # convention \emph{one-based}, which is more convenient to work # with in \R. For more details on one-based indices, see # @see "2. Cell coordinates and cell indices". # } # # \author{ # James Bullard and Kasper Daniel Hansen. # Modified by Henrik Bengtsson to read any subset of units and/or subset of # parameters, to stratify by PM/MM, and to return cell indices. # } # # @examples "../incl/readCdfUnits.Rex" # # \seealso{ # @see "readCdfCellIndices". # } # # \references{ # [1] Affymetrix Inc, Affymetrix GCOS 1.x compatible file formats, # June 14, 2005. # \url{http://www.affymetrix.com/support/developer/} # } # # @keyword "file" # @keyword "IO" #*/######################################################################### readCdfUnits <- function(filename, units=NULL, readXY=TRUE, readBases=TRUE, readExpos=TRUE, readType=TRUE, readDirection=TRUE, stratifyBy=c("nothing", "pmmm", "pm", "mm"), readIndices=FALSE, verbose=0) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filename': filename <- file.path(dirname(filename), basename(filename)); if (!file.exists(filename)) stop("File not found: ", filename); # Argument 'units': if (is.null(units)) { } else if (is.numeric(units)) { units <- as.integer(units); if (any(units < 1)) stop("Argument 'units' contains non-positive indices."); } else { stop("Argument 'units' must be numeric or NULL: ", class(units)[1]); } # Argument 'verbose': if (length(verbose) != 1) stop("Argument 'verbose' must be a single integer."); verbose <- as.integer(verbose); if (!is.finite(verbose)) stop("Argument 'verbose' must be an integer: ", verbose); # Argument 'readXY': readXY <- as.integer(as.logical(readXY)); # Argument 'readBases': readBases <- as.integer(as.logical(readBases)); # Argument 'readExpos': readExpos <- as.integer(as.logical(readExpos)); # Argument 'readType': readType <- as.integer(as.logical(readType)); # Argument 'readDirection': readDirection <- as.integer(as.logical(readDirection)); # Argument 'stratifyBy': stratifyBy <- match.arg(stratifyBy); # Argument 'readIndices': readIndices <- as.integer(as.logical(readIndices)); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read the CDF file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # UNSUPPORTED CASE? if (!is.null(units) && length(units) == 0L) { stop("readCdfUnits(..., units=integer(0)) is not supported.") } cdf <- .Call("R_affx_get_cdf_units", filename, units, readXY, readBases, readExpos, readType, readDirection, readIndices, verbose, PACKAGE="affxparser"); # Sanity check if (is.null(cdf)) { stop("Failed to read CDF file: ", filename); } if (stratifyBy == "nothing") return(cdf); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Stratify by PM, MM, or PM & MM # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - isPm <- readCdfIsPm(filename, units=units); # Using .subset2() instead of "[["() to avoid dispatching overhead etc. if (stratifyBy == "pmmm") { dimnames <- list(c("pm", "mm"), NULL); for (uu in seq_along(cdf)) { # groups <- cdf[[uu]]$groups; groups <- .subset2(.subset2(cdf, uu), "groups"); ngroups <- length(groups); if (ngroups == 0) next; for (gg in 1:ngroups) { # group <- groups[[gg]]; group <- .subset2(groups, gg); # pm <- isPm[[uu]][[gg]]; pm <- .subset2(.subset2(isPm, uu), gg); idx <- 1:length(pm); mm <- idx[!pm]; # Note: which(!pm) is about 60% slower! /HB pm <- idx[pm]; npm <- length(pm); if (npm != length(mm)) { # This is not expected to happen, but just in case. stop("Number of PM and MM probes differ in probeset #", uu, ": ", length(pm), " != ", length(mm)); } pmmm <- matrix(c(pm, mm), nrow=2L, ncol=npm, byrow=TRUE); # dimnames(pmmm) <- dimnames; # Re-order cell elements according to PM/MM. ngroup <- length(group); if (ngroup > 0) { dim <- c(2, npm); for (kk in 1:ngroup) { # value <- group[[kk]][pmmm]; value <- .subset(.subset2(group, kk), pmmm); dim(value) <- dim; dimnames(value) <- dimnames; group[[kk]] <- value; } } # group[["pmmm"]] <- pmmm; groups[[gg]] <- group; } # for (gg ...) cdf[[uu]]$groups <- groups; } # for (uu ...) } else if (stratifyBy == "pm") { for (uu in seq_along(cdf)) { groups <- cdf[[uu]]$groups; ngroups <- length(groups); if (ngroups == 0) next; for (gg in 1:ngroups) { group <- groups[[gg]]; ngroup <- length(group); if (ngroup == 0) next; pm <- .subset2(.subset2(isPm, uu), gg); pm <- (1:length(pm))[pm]; # Note: which(!pm) is about 60% slower! for (kk in 1:ngroup) { group[[kk]] <- .subset(.subset2(group, kk), pm); } groups[[gg]] <- group; } # for (gg ...) cdf[[uu]]$groups <- groups; } # for (uu ...) } else if (stratifyBy == "mm") { for (uu in seq_along(cdf)) { groups <- cdf[[uu]]$groups; ngroups <- length(groups); if (ngroups == 0) next; for (gg in 1:ngroups) { group <- groups[[gg]]; ngroup <- length(group); if (ngroup == 0) next; pm <- .subset2(.subset2(isPm, uu), gg); mm <- (1:length(pm))[!pm]; # Note: which(!pm) is about 60% slower! for (kk in 1:ngroup) { group[[kk]] <- .subset(.subset2(group, kk), mm); } groups[[gg]] <- group; } # for (gg ...) cdf[[uu]]$groups <- groups; } # for (uu ...) } cdf; } # readCdfUnits() ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. # 2011-02-15 # o DOCUMENTATION: Clarified in help(readCdfUnits) that (x,y) coordinates # are zero-based and the _from (x,y) calculated_ cell indices are # one-based, regardless what the indices on file are. # 2010-12-12 # o ROBUSTNESS: Replaces .Internal(matrix(...)) with matrix(). # In the upcoming R 2.13.0 matrix() has less overhead. # 2006-12-30 # o Now 'readDirection=TRUE' also return group directions. # 2006-03-28 # o Unit indices are now one-based. /HB # o Renamed argument 'readCells' to 'readIndices'. /HB # 2006-03-24 # o Not returning 'pmmm' field anymore. A bit faster an smaller object. # o Speed improvement of the "stratifyBy" code. Instead of using which() # one can do the same oneself, which is 50% faster. In addition, I have # replaced "[[" and "[" with .subset2() and .subset(). # 2006-02-21 # o Added argument 'readCells' to speed up the calculation of cell indices # from (x,y), i.e. cell = y * ncol + x. # o Replaced argument 'splitPmMm' with 'stratifyBy'. This will speed up # things down the stream. # 2006-01-16 # o Added argument 'splitPmMm'. /HB # 2006-01-09 # o Note that ../man/readCdfUnits.R in generated from the Rdoc comments # above. See the R.oo package for details. Don't remove the *.Rex files! # o Created by HB. The purpose was to make it possible to read subsets # of units and not just all units at once. /HB ############################################################################ affxparser/R/readCdfUnitsWriteMap.R0000644000175200017520000001472714516003651020321 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction readCdfUnitsWriteMap # # @title "Generates an Affymetrix cell-index write map from a CDF file" # # @synopsis # # \description{ # @get "title". # # The purpose of this method is to provide a re-ordering of cell elements # such that cells in units (probesets) can be stored in contiguous blocks. # When reading cell elements unit by unit, minimal file re-position is # required resulting in a faster reading. # # Note: At the moment does this package not provide methods to # write/reorder CEL files. In the meanwhile, you have to write # and re-read using your own file format. That's not too hard using # \code{writeBin()} and @see "base::readBin". # } # # \arguments{ # \item{filename}{The pathname of the CDF file.} # \item{units}{An @integer @vector of unit indices specifying which units # to listed first. All other units are added in order at the end. # If @NULL, units are in order.} # \item{...}{Additional arguments passed to @see "readCdfUnits".} # \item{verbose}{Either a @logical, a @numeric, or a @see "R.utils::Verbose" # object specifying how much verbose/debug information is written to # standard output. If a Verbose object, how detailed the information is # is specified by the threshold level of the object. If a numeric, the # value is used to set the threshold of a new Verbose object. If @TRUE, # the threshold is set to -1 (minimal). If @FALSE, no output is written # (and neither is the \pkg{R.utils} package required).} # } # # \value{ # A @integer @vector which is a \emph{write} map. # } # # @author "HB" # # \examples{ # @include "../incl/readCdfUnitsWriteMap.Rex" # # @include "../incl/readCdfUnitsWriteMap.2.Rex" # } # # \seealso{ # To invert maps, see @see "invertMap". # @see "readCel" and @see "readCelUnits". # } # # @keyword "file" # @keyword "IO" # @keyword "internal" #*/######################################################################### readCdfUnitsWriteMap <- function(filename, units=NULL, ..., verbose=FALSE) { # To please R CMD check Arguments <- enter <- exit <- NULL; rm(list=c("Arguments", "enter", "exit")); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filename': # Replace '~':s filename <- file.path(dirname(filename), basename(filename)); # Argument 'units': if (!is.null(units)) { units <- as.numeric(units); if (any(is.na(units))) { stop("Argument 'units' contains NAs"); } nok <- (units < 1); if (any(nok)) { nok <- paste(units[nok], collapse=", "); stop("Argument 'units' contains non-positive indices: ", nok); } nok <- duplicated(units); if (any(nok)) { nok <- paste(units[nok], collapse=", "); stop("Argument 'units' contains duplicated indices: ", nok); } } # Argument 'verbose': if (!identical(verbose, FALSE)) { requireNamespace("R.utils") || stop("Package not loaded: R.utils"); Arguments <- R.utils::Arguments enter <- R.utils::enter exit <- R.utils::exit verbose <- Arguments$getVerbose(verbose); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read CDF header and process 'units' further # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - header <- readCdfHeader(filename); nbrOfCells <- header$ncols * header$nrows; nbrOfUnits <- header$probesets; nok <- (units > nbrOfUnits); if (any(nok)) { nok <- paste(units[nok], collapse=", "); stop("Argument 'units' contains indices out of range [1,", nbrOfUnits, "]: ", nok); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read CDF file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read cell indices unit by unit verbose && enter(verbose, "Reading cell indices unit by unit from CDF file"); indices <- readCdfCellIndices(filename, units=units, ..., verbose=FALSE); verbose && exit(verbose); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Return cell indices according to 'units' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (!is.null(units)) { verbose && enter(verbose, "Reordering by units"); # Was only a subset of units specified? if (length(units) != nbrOfUnits) { verbose && enter(verbose, "Adding missing unit indices"); allUnits <- 1:nbrOfUnits; missing <- setdiff(allUnits, units); units <- c(units, missing); missing <- allUnits <- NULL; # Not needed anymore verbose && exit(verbose); } # Now, reorder the units (here 'indices') accordingly. indices <- indices[units]; units <- NULL; # Not needed anymore verbose && exit(verbose); } indices <- unlist(indices, use.names=FALSE); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Create index map # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Adding missing cell indices"); # Add non-probeset cells to the end. # (Note that readCdfCellIndices() do not read these guys.) allIndices <- 1:nbrOfCells; missing <- setdiff(allIndices, indices); indices <- c(indices, missing); missing <- NULL; # Not needed anymore verbose && exit(verbose); # Returns the write map indices; } ############################################################################ # HISTORY: # 2006-09-07 # o BUG FIX: Tried to access fields 'cols' and 'rows' instead of 'ncols' # and 'nrows' in the CDF header. # 2006-04-01 # o Now using readCdfCellIndices() to get cell indices. # 2006-03-30 # o Redefined and renamed method to readCdfUnitsWriteMap(). # o Removed argument 'writeMap'. This is possible because the new # invertMap() is so fast. # 2006-03-28 # o Unit and cell indices are now one-based. /HB # 2006-03-14 # o Updated code to make use of package R.utils only if it is available. # o Added argument 'writeMap'. # o Added more Rdoc comments and two examples with summaries etc. # 2006-03-06 # o Added argument 'units' to read a subset of units or the units as, say, # they are order by chromsomal position. # 2006-03-04 # o Removed all gc(). They slow down quite a bit. # o Created. ############################################################################ affxparser/R/readCel.R0000644000175200017520000001270214516003651015623 0ustar00biocbuildbiocbuildreadCel <- function(filename, indices = NULL, readHeader = TRUE, readXY = FALSE, readIntensities = TRUE, readStdvs = FALSE, readPixels = FALSE, readOutliers = TRUE, readMasked = TRUE, readMap = NULL, verbose = 0, .checkArgs = TRUE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - qsort <- function(x) { ## o0 <- .Internal(qsort(x, TRUE)); ## o <- sort.int(x, index.return=TRUE, method="quick"); ## stopifnot(identical(o, o0)); sort.int(x, index.return=TRUE, method="quick"); } # qsort() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - readAll <- is.null(indices); if (.checkArgs) { # Argument 'filename': if(length(filename) != 1) { stop("Argument 'filename' should be a single file: ", paste(filename, collapse=", ")); } # Expand '~' pathnames to full pathnames. filename <- file.path(dirname(filename), basename(filename)); if (!file.exists(filename)) { stop("Cannot read CEL file. File not found: ", filename); } # Argument 'indices': header <- readCelHeader(filename); nbrOfCells <- header$total; if (readAll) { # Read all cells indices <- 1:nbrOfCells; } else { indices <- as.integer(indices); if (any(is.na(indices))) { stop("Argument 'indices' contains NAs."); } if (any(indices < 1) || any(indices > nbrOfCells)) { stop("Argument 'indices' is out of range [1,", nbrOfCells, "]."); } } # Argument 'readMap': if (!is.null(readMap)) { readMap <- .assertMap(readMap, nbrOfCells); } # Argument 'verbose': if (length(verbose) != 1) { stop("Argument 'verbose' must be a single integer."); } if (!is.finite(as.integer(verbose))) { stop("Argument 'verbose' must be an integer: ", verbose); } } # if (.checkArgs) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Remapping cell indices? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.null(readMap)) { # If not read map and no indices very given, then all cells are read # and already in order an no need to sort them. # *all cells are read reorder <- FALSE; } else { indices <- readMap[indices]; reorder <- TRUE; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Order cell indices for optimal speed when reading, i.e. minimizing # jumping around in the file. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (reorder) { # qsort() is about 10-15 times faster than using order()! # WAS: o <- .Internal(qsort(indices, TRUE)); # From base::sort.int() o <- qsort(indices); indices <- o$x; # WAS: o <- .Internal(qsort(o$ix, TRUE))$ix; # From base::sort.int() o <- qsort(o$ix)$ix; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Reading CEL file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # UNSUPPORTED CASE? if (!is.null(indices) && length(indices) == 0L) { stop("readCel(..., indices=integer(0)) is not supported.") } cel <- .Call("R_affx_get_cel_file", filename, readHeader, readIntensities, readXY, readXY, readPixels, readStdvs, readOutliers, readMasked, indices, as.integer(verbose), PACKAGE="affxparser"); # Sanity check if (is.null(cel)) { stop("Failed to read CEL file: ", filename); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Re-reordering the cell values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (reorder) { fields <- setdiff(names(cel), c("header", "outliers", "masked")); for (name in fields) { cel[[name]] <- cel[[name]][o]; } } cel; } # readCel() ############################################################################ # HISTORY: # 2012-05-22 [HB] # o CRAN POLICY: readCel() and readCelUnits() are no longer calling # .Internal(qsort(...)). # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. # 2007-12-01 # o Removed argument 'reorder' from readCel(). # 2007-01-04 # o Now 'readMap' is validate using internal .assertMap(), which also # coerces it to an integer vector. # o BUG FIX: Using read maps for readCel() would give an error saying # the read map is invalid even when it is not. # 2006-04-01 # o Added argument 'reorder'. # 2006-03-29 # o Added argument '.checkArgs' so that when arguments have already been # checked, for instance by readCelUnits(), we pay less overhead when # calling this function. file.exists() is a bit slow. /HB # 2006-03-28 # o Unit and cell indices are now one-based. /HB ############################################################################ affxparser/R/readCelHeader.R0000644000175200017520000000241614516003651016735 0ustar00biocbuildbiocbuildreadCelHeader <- function(filename) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filename': if(length(filename) != 1) { stop("Argument 'filename' should be a single file: ", paste(filename, collapse=", ")); } # Expand '~' pathnames to full pathnames. filename <- file.path(dirname(filename), basename(filename)); if (!file.exists(filename)) { stop("Cannot read CEL file header. File not found: ", filename); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Reading CEL header # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - res <- .Call("R_affx_get_cel_file_header", filename, PACKAGE="affxparser"); # Sanity check if (is.null(res)) { stop("Failed to read CEL file header: ", filename); } res; } # readCelHeader() ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. ############################################################################ affxparser/R/readCelIntensities.R0000644000175200017520000000610014516003651020035 0ustar00biocbuildbiocbuildreadCelIntensities <- function(filenames, indices = NULL, ..., verbose = 0){ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filenames': if (length(filenames) == 0) stop("Argument 'filenames' is empty."); # Expand '~' pathnames to full pathnames. filenames <- file.path(dirname(filenames), basename(filenames)); missing <- !file.exists(filenames); if(any(missing)) { missing <- paste(filenames[missing], collapse=", "); stop("Cannot read CEL files. Some files not found: ", missing); } # Argument 'verbose': if (length(verbose) != 1) { stop("Argument 'verbose' must be a single integer."); } if (!is.finite(as.integer(verbose))) { stop("Argument 'verbose' must be an integer: ", verbose); } verbose <- as.integer(verbose); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if(verbose > 0) { cat("Entering readCelIntensities()\n ... reading headers\n"); } # Read all CEL headers all.headers <- lapply(as.list(filenames), readCelHeader) # Validate that all chips are of the same type and have the same layout chiptype <- unique(sapply(all.headers, function(x) x$chiptype)) if(length(chiptype) != 1) { warning("The CEL files do not have the same chiptype.") } nrows <- unique(sapply(all.headers, function(x) x$rows)) ncols <- unique(sapply(all.headers, function(x) x$cols)) if(length(nrows) != 1 || length(ncols) != 1) { stop("The CEL files dimension do not match."); } nfiles <- length(filenames); if(verbose > 0) { cat(" ...allocating memory for intensity matrix\n"); } # Allocating return matrix if(is.null(indices)) { intensities <- matrix(NA_real_, nrow = nrows * ncols, ncol = nfiles) } else { intensities <- matrix(NA_real_, nrow = length(indices), ncol = nfiles) } colnames(intensities) <- filenames # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Reading intensities from all CEL files # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (i in seq_len(nfiles)) { if(verbose > 0) cat(" ... reading", filenames[i], "\n"); intensities[, i] <- readCel(filename = filenames[i], indices = indices, readIntensities = TRUE, readHeader = FALSE, readStdvs = FALSE, readPixels = FALSE, readXY = FALSE, readOutliers = FALSE, readMasked = FALSE, ..., verbose = (verbose - 1))$intensities } # for (i in ...) intensities; } # readCelIntensities() affxparser/R/readCelRectangle.R0000644000175200017520000000603414516003651017451 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction readCelRectangle # # @title "Reads a spatial subset of probe-level data from Affymetrix CEL files" # # @synopsis # # \description{ # @get "title". # } # # \arguments{ # \item{filename}{The pathname of the CEL file.} # \item{xrange}{A @numeric @vector of length two giving the left # and right coordinates of the cells to be returned.} # \item{yrange}{A @numeric @vector of length two giving the top # and bottom coordinates of the cells to be returned.} # \item{...}{Additional arguments passed to @see "readCel".} # \item{asMatrix}{If @TRUE, the CEL data fields are returned as # matrices with element (1,1) corresponding to cell # (xrange[1],yrange[1]).} # } # # \value{ # A named @list CEL structure similar to what @see "readCel". # In addition, if \code{asMatrix} is @TRUE, the CEL data fields # are returned as matrices, otherwise not. # } # # @author "HB" # # @examples "../incl/readCelRectangle.Rex" # # \seealso{ # The @see "readCel" method is used internally. # } # # @keyword "file" # @keyword "IO" #*/######################################################################### readCelRectangle <- function(filename, xrange=c(0,Inf), yrange=c(0,Inf), ..., asMatrix=TRUE) { # Argument 'xrange' and 'yrange': if (length(xrange) != 2) { stop("Argument 'xrange' is not of length 2: ", length(xrange)) } if (length(yrange) != 2) { stop("Argument 'yrange' is not of length 2: ", length(yrange)) } # Get the chip layout from the CEL header header <- readCelHeader(filename); nrow <- header$rows; ncol <- header$cols; xrange[1] <- max(min(xrange[1], ncol-1),0); xrange[2] <- max(min(xrange[2], ncol-1),0); yrange[1] <- max(min(yrange[1], nrow-1),0); yrange[2] <- max(min(yrange[2], nrow-1),0); yy <- yrange[1]:yrange[2]; offsets <- yy * ncol + xrange[1]; xrange <- xrange - xrange[1]; xx <- xrange[1]:xrange[2]; cells <- matrix(offsets, ncol=length(yy), nrow=length(xx), byrow=TRUE); # Cell indices are one-based in R cells <- cells + xx + 1; xrange <- yrange <- yy <- xx <- offsets <- NULL; # Not needed anymore # Read CEL data cel <- readCel(filename, indices=cells, ...); # Rearrange each field into a matrix? if (asMatrix) { nrow <- nrow(cells); cells <- NULL; # Not needed anymore # Cell-value fields fields <- c("x", "y", "intensities", "stdvs", "pixels"); fields <- intersect(fields, names(cel)); for (field in fields) { cel[[field]] <- matrix(cel[[field]], ncol=nrow, byrow=TRUE); } } cel; } # readCelRectangle() ############################################################################ # HISTORY: # 2014-10-24 # ROBUSTNESS: Now readCelRectangle() gives an informative error message # if argument 'xrange' or 'yrange' is not of length two. # 2006-03-28 # o Now cell indices are one-based. # 2006-03-22 # o Added Rdoc comments. # 2006-03-21 (Stockholm, Sveavagen) # o Created. ############################################################################ affxparser/R/readCelUnits.R0000644000175200017520000005446714516003651016664 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction readCelUnits # # @title "Reads probe-level data ordered as units (probesets) from one or several Affymetrix CEL files" # # @synopsis # # \description{ # @get "title" by using the unit and group definitions in the # corresponding Affymetrix CDF file. # } # # \arguments{ # \item{filenames}{The filenames of the CEL files.} # \item{units}{An @integer @vector of unit indices specifying which # units to be read. If @NULL, all units are read.} # \item{stratifyBy}{Argument passed to low-level method # @see "affxparser::readCdfCellIndices".} # \item{cdf}{A @character filename of a CDF file, or a CDF @list # structure. If @NULL, the CDF file is searched for by # @see "findCdf" first starting from the current directory and # then from the directory where the first CEL file is.} # \item{...}{Arguments passed to low-level method # @see "affxparser::readCel", e.g. \code{readXY} and \code{readStdvs}.} # \item{addDimnames}{If @TRUE, dimension names are added to arrays, # otherwise not. The size of the returned CEL structure in bytes # increases by 30-40\% with dimension names.} # \item{dropArrayDim}{If @TRUE and only one array is read, the elements of # the group field do \emph{not} have an array dimension.} # \item{transforms}{A @list of exactly \code{length(filenames)} # @functions. If @NULL, no transformation is performed. # Intensities read are passed through the corresponding transform # function before being returned.} # \item{readMap}{A @vector remapping cell indices to file indices. # If @NULL, no mapping is used.} # \item{verbose}{Either a @logical, a @numeric, or a @see "R.utils::Verbose" # object specifying how much verbose/debug information is written to # standard output. If a Verbose object, how detailed the information is # is specified by the threshold level of the object. If a numeric, the # value is used to set the threshold of a new Verbose object. If @TRUE, # the threshold is set to -1 (minimal). If @FALSE, no output is written # (and neither is the \pkg{R.utils} package required). # } # } # # \value{ # A named @list with one element for each unit read. The names # corresponds to the names of the units read. # Each unit element is in # turn a @list structure with groups (aka blocks). # Each group contains requested fields, e.g. \code{intensities}, # \code{stdvs}, and \code{pixels}. # If more than one CEL file is read, an extra dimension is added # to each of the fields corresponding, which can be used to subset # by CEL file. # # Note that neither CEL headers nor information about outliers and # masked cells are returned. To access these, use @see "readCelHeader" # and @see "readCel". # } # # @author "HB" # # @examples "../incl/readCelUnits.Rex" # # \seealso{ # Internally, @see "readCelHeader", @see "readCdfUnits" and # @see "readCel" are used. # } # # \references{ # [1] Affymetrix Inc, Affymetrix GCOS 1.x compatible file formats, # June 14, 2005. # \url{http://www.affymetrix.com/support/developer/} # } # # @keyword "file" # @keyword "IO" #*/######################################################################### readCelUnits <- function(filenames, units=NULL, stratifyBy=c("nothing", "pmmm", "pm", "mm"), cdf=NULL, ..., addDimnames=FALSE, dropArrayDim=TRUE, transforms=NULL, readMap=NULL, verbose=FALSE) { # To please R CMD check Arguments <- enter <- exit <- NULL; rm(list=c("Arguments", "enter", "exit")); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - qsort <- function(x) { ## o0 <- .Internal(qsort(x, TRUE)); ## o <- sort.int(x, index.return=TRUE, method="quick"); ## stopifnot(identical(o, o0)); sort.int(x, index.return=TRUE, method="quick"); } # qsort() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filenames': filenames <- file.path(dirname(filenames), basename(filenames)); missing <- filenames[!file.exists(filenames)]; if (length(missing)) { stop("File(s) not found: ", paste(missing, collapse=", ")); } # Argument 'units' and 'cdf': if (is.list(cdf) && !is.null(units)) { stop("Arguments 'units' must not be specified if argument 'cdf' is a CDF list structure."); } # Argument 'units': if (is.null(units)) { } else if (is.numeric(units)) { units <- as.integer(units); # Unit indices are one-based in R if (any(units < 1L)) stop("Argument 'units' contains non-positive indices."); } else { stop("Argument 'units' must be numeric or NULL: ", class(units)[1]); } # Argument 'cdf': searchForCdf <- FALSE; if (is.null(cdf)) { searchForCdf <- TRUE; } else if (is.character(cdf)) { cdfFile <- file.path(dirname(cdf), basename(cdf)); if (!file.exists(cdfFile)) stop("File not found: ", cdfFile); cdf <- NULL; } else if (is.list(cdf)) { aUnit <- cdf[[1L]]; if (!is.list(aUnit)) stop("Argument 'cdf' is of unknown format: First unit is not a list."); groups <- aUnit$groups; if (!is.list(groups)) stop("Argument 'cdf' is of unknown format: Units Does not contain the list 'groups'."); extractGroups <- (length(names(aUnit)) > 1L); # Check for group fields 'indices' or 'x' & 'y' in one of the groups. aGroup <- groups[[1]]; extractFields <- TRUE; fields <- names(aGroup); if ("indices" %in% fields) { cdfType <- "indices"; extractFields <- (length(fields) > 1L); } else if (all(c("x", "y") %in% fields)) { # The CDF is needed in order to know the (x,y) dimensions of the # chip so that one can calculate (x,y) -> cell index. cdfType <- "x"; searchForCdf <- TRUE; } else { stop("Argument 'cdf' is of unknown format: The groups contains neither the fields 'indices' nor ('x' and 'y')."); } aUnit <- groups <- aGroup <- NULL; # Not needed anymore } else { stop("Argument 'cdf' must be a filename, a CDF list structure or NULL: ", mode(cdf)); } # Argument 'readMap': if (!is.null(readMap)) { # Cannot check map indices without knowing the array. Is it worth # reading such details already here? } # Argument 'dropArrayDim': dropArrayDim <- as.logical(dropArrayDim); # Argument 'addDimnames': addDimnames <- as.logical(addDimnames); nbrOfArrays <- length(filenames); # Argument 'transforms': if (is.null(transforms)) { hasTransforms <- FALSE; } else if (is.list(transforms)) { if (length(transforms) != nbrOfArrays) { stop("Length of argument 'transforms' does not match the number of arrays: ", length(transforms), " != ", nbrOfArrays); } for (transform in transforms) { if (!is.function(transform)) stop("Argument 'transforms' must be a list of functions."); } hasTransforms <- TRUE; } else { stop("Argument 'transforms' must be a list of functions or NULL."); } # Argument 'stratifyBy': stratifyBy <- match.arg(stratifyBy); # Argument 'verbose': (Utilized the Verbose class in R.utils if available) if (!identical(verbose, FALSE)) { requireNamespace("R.utils") || stop("Package not loaded: R.utils"); Arguments <- R.utils::Arguments enter <- R.utils::enter exit <- R.utils::exit verbose <- Arguments$getVerbose(verbose); } cVerbose <- -(as.numeric(verbose) + 2); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 0. Search for CDF file? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (searchForCdf) { verbose && enter(verbose, "Searching for CDF file"); verbose && enter(verbose, "Reading chip type from first CEL file"); celHeader <- readCelHeader(filenames[1L]); chipType <- celHeader$chiptype; verbose && exit(verbose); verbose && enter(verbose, "Searching for chip type '", chipType, "'"); cdfFile <- findCdf(chipType=chipType); if (length(cdfFile) == 0L) { # If not found, try also where the first CEL file is opwd <- getwd(); on.exit(setwd(opwd)); setwd(dirname(filenames[1L])); cdfFile <- findCdf(chipType=chipType); setwd(opwd); } verbose && exit(verbose); if (length(cdfFile) == 0L) stop("No CDF file for chip type found: ", chipType); verbose && exit(verbose); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 1. Read cell indices for units of interest from the CDF file? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.null(cdf)) { # verbose && enter(verbose, "Reading cell indices from CDF file"); cdf <- readCdfCellIndices(cdfFile, units=units, stratifyBy=stratifyBy, verbose=FALSE); # verbose && exit(verbose); # Assume 'cdf' contains only "indices" fields. indices <- unlist(cdf, use.names=FALSE); } else { if (cdfType == "indices") { # Clean up CDF list structure from other elements than groups? if (extractGroups) { # verbose && enter(verbose, "Extracting groups..."); cdf <- lapply(cdf, FUN=function(unit) list(groups=unit$groups)); # verbose && exit(verbose); } # Clean up CDF list structure from other group fields than "indices"? if (extractFields) { # verbose && enter(verbose, "Extracting fields..."); cdf <- applyCdfGroups(cdf, cdfGetFields, fields="indices"); # verbose && exit(verbose); } indices <- unlist(cdf, use.names=FALSE); } else { verbose && enter(verbose, "Calculating cell indices from (x,y) positions"); verbose && enter(verbose, "Reading chip layout from CDF file"); cdfHeader <- readCdfHeader(cdfFile); ncol <- cdfHeader$cols; verbose && exit(verbose); # Clean up CDF list structure from other elements than groups cdf <- lapply(cdf, FUN=function(unit) list(groups=unit$groups)); x <- unlist(applyCdfGroups(cdf, cdfGetFields, "x"), use.names=FALSE); y <- unlist(applyCdfGroups(cdf, cdfGetFields, "y"), use.names=FALSE); # Cell indices are one-based in R indices <- as.integer(y * ncol + x + 1); x <- y <- NULL; # Not needed anymore verbose && exit(verbose); } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 2. Remapping cell indices? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (!is.null(readMap)) { verbose && enter(verbose, "Remapping cell indices"); indices <- readMap[indices]; verbose && exit(verbose); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 2b. Order cell indices for optimal speed when reading, i.e. minimal # jumping around in the file. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - reorder <- TRUE; # Hardwired from now on. if (reorder) { verbose && enter(verbose, "Reordering cell indices to optimize speed"); # qsort() is about 10-15 times faster than using order()! # WAS: o <- .Internal(qsort(indices, TRUE)); # From base::sort.int() o <- qsort(indices); indices <- o$x; # WAS: o <- .Internal(qsort(o$ix, TRUE))$ix; # From base::sort.int() o <- qsort(o$ix)$ix; verbose && exit(verbose); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 3. Read signals of the cells of interest from the CEL file(s) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Comment: We assign elements of CEL list structure to local environment, # because calling cel[[field]][idxs,] multiple (=nbrOfUnits) times is very # slow whereas get(field) is much faster (about 4-6 times actually!) # /HB 2006-03-24 nbrOfCells <- length(indices); nbrOfUnits <- length(cdf); # Because integer 'nbrOfCells*nbrOfArrays' may overflow to NA, we corce to double # here. See aroma.affymetrix thread 'Speeding up RmaBackgroundCorrection' on # 2014-02-27 for background/details. # FIXME: Ideally, this function should be rewritten to read signals and group them # into CEL units in chunks. /HB 2014-02-27 nbrOfEntries <- as.double(nbrOfCells) * as.double(nbrOfArrays); stopifnot(is.finite(nbrOfEntries)); verbose && enter(verbose, "Reading ", nbrOfUnits, "*", nbrOfCells/nbrOfUnits, "=", nbrOfCells, " cells from ", nbrOfArrays, " CEL files"); # Cell-value elements cellValueFields <- c("x", "y", "intensities", "stdvs", "pixels"); integerFields <- "pixels"; doubleFields <- setdiff(cellValueFields, integerFields); # Local environment where to store the temporary variables env <- environment(); for (kk in seq_len(nbrOfArrays)) { filename <- filenames[kk]; verbose && enter(verbose, "Reading CEL data for array #", kk); celTmp <- readCel(filename, indices=indices, readHeader=FALSE, readOutliers=FALSE, readMasked=FALSE, ..., readMap=NULL, verbose=cVerbose, .checkArgs=FALSE); verbose && exit(verbose); if (kk == 1L) { verbose && enter(verbose, "Allocating return structure"); # Allocate the return list structure # celTmp$header <- NULL; celFields <- names(celTmp); # Update list of special fields cellValueFields <- intersect(celFields, cellValueFields); doubleFields <- intersect(cellValueFields, doubleFields); integerFields <- intersect(cellValueFields, integerFields); # Allocate all field variables dim <- c(nbrOfCells, nbrOfArrays); value <- vector("double", length=nbrOfEntries); dim(value) <- dim; for (name in doubleFields) assign(name, value, envir=env, inherits=FALSE); value <- NULL; # Not needed anymore value <- vector("integer", length=nbrOfEntries); dim(value) <- dim; for (name in integerFields) assign(name, value, envir=env, inherits=FALSE); value <- NULL; # Not needed anymore verbose && exit(verbose); } for (name in cellValueFields) { # Extract field values and re-order them again value <- celTmp[[name]]; if (is.null(value)) next; # "Re-reorder" cells read if (reorder) value <- value[o]; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Transform signals? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (hasTransforms && name == "intensities") { verbose && enter(verbose, "Transform signals for array #", kk); value <- transforms[[kk]](value); verbose && exit(verbose); } eval(substitute(name[,kk] <- value, list(name=as.name(name)))); value <- NULL; # Not needed anymore } # for (name ...) celTmp <- NULL; # Not needed anymore } verbose && exit(verbose); indices <- NULL; # Not needed anymore # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 3. Structure CEL data in units and groups according to the CDF file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Structuring data by units and groups"); fields <- vector("list", length=length(cellValueFields)); names(fields) <- cellValueFields; # Keep a copy for groups with empty fields emptyFields <- fields; # Add a dimension for the arrays, unless only one array is read # and the array dimension is not wanted. addArrayDim <- (nbrOfArrays >= 2L || !dropArrayDim); seqOfArrays <- list(seq_len(nbrOfArrays)); offset <- 0L; res <- lapply(cdf, FUN=function(u) { lapply(.subset2(u, "groups"), FUN=function(g) { # Same dimensions of all fields field <- .subset2(g, 1L); # Faster than g[[1L]] ncells <- length(field); # Empty unit group? if (ncells == 0L) return(emptyFields); idxs <- offset + 1:ncells; offset <<- offset + ncells; # Get the target dimension dim <- dim(field); if (is.null(dim)) dim <- ncells; if (addDimnames) { dimnames <- dimnames(field); if (is.null(dimnames)) dimnames <- list(seq_len(dim)); # Add an extra dimension for arrays? if (addArrayDim) { dim <- c(dim, nbrOfArrays); dimnames <- c(dimnames, seqOfArrays); } # Update all fields with dimensions setDim <- (length(dim) > 1L); for (name in cellValueFields) { # Faster to drop dimensions. values <- get(name, envir=env, inherits=FALSE)[idxs,,drop=TRUE]; if (setDim) { dim(values) <- dim; dimnames(values) <- dimnames; } else { names(values) <- dimnames; } fields[[name]] <- values; values <- NULL; # Not needed anymore } } else { # Add an extra dimension for arrays? if (addArrayDim) dim <- c(dim, nbrOfArrays); # Update all fields with dimensions setDim <- (length(dim) > 1L); for (name in cellValueFields) { # Faster to drop dimensions. values <- get(name, envir=env, inherits=FALSE)[idxs,,drop=TRUE]; if (setDim) dim(values) <- dim; fields[[name]] <- values; values <- NULL; # Not needed anymore } } # if (addDimnames) fields; }) # lapply(.subset2(u, "groups"), ...); }) # lapply(cdf, ...) verbose && exit(verbose); res; } ############################################################################ # HISTORY: # 2014-02-27 [HB] # o ROBUSTNESS: Using integer constants (e.g. 1L) where applicable. # o ROBUSTNESS: Using explicitly named arguments in more places. # 2012-05-22 [HB] # o CRAN POLICY: readCel() and readCelUnits() are no longer calling # .Internal(qsort(...)). # 2007-12-01 [HB] # o Removed argument 'reorder' from readCelUnits(). Reordering is now always # done. # 2007-06-28 [HB] # o Removed the name of the second argument in a substitute() call. # 2007-02-01 [KS] # o Now readCelUnits() can handle unit groups for which there are no probes, # e.g. when stratifying on PM in a unit containing only MMs. # 2006-10-04 [HB] # o Made readCelUnits() a bit more clever if a 'cdf' structure with only # cell indices is passed. Then all fields are just indices and one can # call unlist immediately. This speeds things up a bit. # 2006-05-12 [HB] # o Rearranged order of arguments such that the most often used/most user- # friendly arguments come first. This was done as a first step after # our developers meeting yesterday. # 2006-04-18 [HB] # o BUG FIX: When argument 'cdf' was a CDF list structure with elements # 'type' or 'direction', readCelUnits() would not read the correct cells # because the values of 'type' and 'direction' would be included in the # extracted list of cell indices. # 2006-04-15 [HB] # o BUG FIX: Passed '...' to both readCdfCellIndices() and readCel(), but # should only be passed to the latter. # 2006-04-01 [HB] # o Using readCdfCellIndices() instead of readCdfUnits(). Faster! # o Added argument 'reorder'. If TRUE, all cells are read in order to # minimize the jumping around in the file. This speeds things up a lot! # I tried this last week, but for some reason I did not see a difference. # 2006-03-29 [HB] # o Renamed argument 'map' to 'readMap'. # 2006-03-28 [HB] # o Unit and cell indices are now one-based. # o Renamed argument 'readCells' to 'readIndices' and same with the name of # the returned group field. # 2006-03-26 [HB] # o Now only "x", "y", "intensities", "pixels", and "stdvs" values are # returned. # 2006-03-24 [HB] # o Made the creation of the final CEL structure according to the CDF much # faster. Now it is about 4-6 times faster utilizing get(field) instead # of cel[[field]]. # o Tried to reorder cell indices in order to minimize jumping around in the # file, but there seems to be no speed up at all doing this. Strange! # 2006-03-14 # o Updated code to make use of package R.utils only if it is available. # 2006-03-08 # o Removed the usage of Arguments of R.utils. This is because we might # move this function to the affxparser package. Still to be removed is # the use of the Verbose class. # 2006-03-04 # o Added argument 'map'. # o Removed all gc(). They slow down quite a bit. # 2006-02-27 [HB] # o BUG FIX: It was only stratifyBy="pmmm" that worked if more than one # array was read. # 2006-02-23 [HB] # o The restructuring code is now more generic, e.g. it does not require # the 'stratifyBy' argument and can append multiple arrays of any # dimensions. # o Now the CDF file is search for where the CEL files lives too. # 2006-02-22 [HB] # o First test where argument 'cdf' is a CDF structure. Fixed some bugs, # but it works now. # o Simple redundancy test: The new code and the updated affxparser package # works with the new aroma.affymetrix/rsp/ GUI. # o Now argument 'cdf' is checked to contain either 'cells' or 'x' & 'y' # group fields. If 'x' and 'y', the cell indices are calculated from # (x,y) and the chip layout obtained from the header of CDF file, which # has been searched for. # 2006-02-21 [HB] # o TO DO: Re implement all of this in a C function to speed things up # further; it is better to put values in the right position from the # beginning. # o Added arguments 'transforms' to be able to transform all probe signals # at once. This improves the speed further. # o Removed annotation of PM/MM dimension when 'stratifyBy="pmmm", because # the resulting object increases ~33% in size. # o Improved speed for restructuring cell data about 20-25%. # o Now it is possible to read multiple CEL files. # o Making use of new 'readCells' in readCdfUnits(), which is much faster. # o Replaced argument 'splitPmMm' with 'stratifyBy'. This speeds up if # reading PM (or MM) only. # o BUG FIX: 'splitPmMm=TRUE' allocated PMs and MMs incorrectly. The reason # was that the indices are already in the correct PM/MM order from the # splitPmMm in readCdfUnits() from which the (x,y) to cell indices are # calculated. # o Updated to make use of the latest affxparser. # 2006-01-24 [HB] # o BUG FIX: Made some modifications a few days ago that introduced missing # variables etc. # 2006-01-21 [HB] # o Added Rdoc comments. # 2006-01-16 [HB] # o Created by HB. ############################################################################ affxparser/R/readChp.R0000644000175200017520000000073714516003651015637 0ustar00biocbuildbiocbuildreadChp <- function(filename, withQuant=TRUE) { res <- .Call("R_affx_get_chp_file", filename, withQuant); # Sanity check if (is.null(res)) { stop("Failed to read CHP file: ", filename); } res; } # readChp() ############################################################################ # HISTORY: # 2011-11-18 # o ROBUSTNESS: Added sanity check that the native code did not return NULL. ############################################################################ affxparser/R/readClf.R0000644000175200017520000000227714516003651015632 0ustar00biocbuildbiocbuildreadClfHeader <- function(file) { as.list(readClfEnv(file, readBody=FALSE)); } readClf <- function(file) { as.list(readClfEnv(file, readBody=TRUE)); } readClfEnv <- function(file, readBody=TRUE) { ## FIXME: this is an exception in more recent TsvFile.cpp # Argument 'file': if (!file.exists(file)) { stop("Cannot read CLF file. File not found: ", file); } env <- new.env(parent=emptyenv()); res <- .Call("R_affx_get_clf_file", file, readBody, env, PACKAGE="affxparser"); # Sanity check if (is.null(res)) { stop("Failed to read CLF file: ", file); } res; } # readClfEnv() ############################################################################ # HISTORY: # 2012-06-14 [HB] # o Extracted all CLF functions to readClf.R. # o Harmonized the error messages. # 2011-11-18 [HB] # o ROBUSTNESS: Added sanity check that the native code did not return NULL. # 2008-03-24 [JB] # o ROBUSTNESS: Asserts that argument 'indices' to readClfEnv() and # readPgfEnv() is ordered. # 2008-03-22 [JB] # o Added read(Clf|Pgf)Env() functions. # 2008-03-04 [MM] # o Added CLF and PGF parsers by Martin Morgan (MM). ############################################################################ affxparser/R/readPgf.R0000644000175200017520000001007014516003651015630 0ustar00biocbuildbiocbuildreadPgfHeader <- function(file) { as.list(readPgfEnv(file, readBody=FALSE)); } readPgf <- function(file, indices=NULL) { # UNSUPPORTED CASE ## FIXME: When we have confirmed that no dependencies assumes ## !is.null(indices) && length(indices) == ) to read ## all probesets, then we can remove this. /HB 2014-11-22 if (!is.null(indices) && length(indices) == 0L) { stop("readPgf(..., indices=integer(0)) is not supported.") } as.list(readPgfEnv(file, readBody=TRUE, indices=indices)); } readPgfEnv <- function(file, readBody=TRUE, indices=NULL) { ## Local functions coercePgfHeader <- function(res, ...) { header <- res$header if (!is.list(header)) return(res) ## Optional fields to coerce to integers fields <- c("num-cols", "num-rows", "probesets", "datalines") fields <- intersect(fields, names(header)) # optional! for (field in fields) { value <- header[[field]] value <- as.integer(value) header[[field]] <- value } res$header <- header res } # coercePgfHeader() ## FIXME: this is an exception in more recent TsvFile.cpp # Argument 'file': if (!file.exists(file)) { stop("Cannot read PGF file. File not found: ", file); } # Argument 'indices': if (!is.null(indices)) { indices <- as.integer(indices) if (any(indices <= 0L)) { stop("Argument 'indices' contains a non-positive element") } if (!all(sort(indices) == indices)) { stop("Argument 'indices' must be sorted."); } } # UNSUPPORTED CASE ## FIXME: When we have confirmed that no dependencies assumes ## !is.null(indices) && length(indices) == ) to read ## all probesets, then we can remove this. /HB 2014-11-22 if (!is.null(indices) && length(indices) == 0L) { stop("readPgf(..., indices=integer(0)) is not supported.") } ## Read all of PGF file? if (is.null(indices)) { env <- new.env(parent=emptyenv()); res <- .Call("R_affx_get_pgf_file", file, readBody, env, NULL, PACKAGE="affxparser"); if (is.null(res)) stop("Failed to read PGF file: ", file) res <- coercePgfHeader(res) } else { ## Read file header env <- new.env(parent=emptyenv()) res <- .Call("R_affx_get_pgf_file", file, FALSE, env, NULL, PACKAGE="affxparser") if (is.null(res)) stop("Failed to read PGF file: ", file) res <- coercePgfHeader(res) # Validate indices? nbrOfUnits <- res$header$probesets if (is.numeric(nbrOfUnits)) { if (any(indices > nbrOfUnits)) { stop(sprintf("Argument 'indices' is out of range [1,%d]", nbrOfUnits)) } } res <- .Call("R_affx_get_pgf_file", file, readBody, env, indices, PACKAGE="affxparser") res <- coercePgfHeader(res) } res; } # readPgfEnv() .pgfProbeIndexFromProbesetIndex <- function(pgf, probesetIdx) { ## probeset -> atom astarts <- pgf[["probesetStartAtom"]][probesetIdx]; aends <- pgf[["probesetStartAtom"]][probesetIdx+1]; aends[is.na(aends)] <- length(pgf[["atomId"]]) + 1; ## atom -> probe pstarts <- pgf[["atomStartProbe"]][astarts]; pends <- pgf[["atomStartProbe"]][aends] - 1; pends[is.na(pends)] <- length(pgf[["probeId"]]); mapply(":", pstarts, pends, SIMPLIFY=FALSE); } # .pgfProbeIndexFromProbesetIndex() ############################################################################ # HISTORY: # 2015-04-15 [HB] # o ROBUSTNESS: Now readPgfEnv()/readPgf() validated 'indices', iff possible. # o Now readPgfEnv()/readPgf() coerces some header fields to integers. # 2012-06-14 [HB] # o readPgfEnv(..., indices=NULL) no longer gives a warning. # o Moved all CLF functions to readClf.R. # o Harmonized the error messages. # 2011-11-18 [HB] # o ROBUSTNESS: Added sanity check that the native code did not return NULL. # 2008-03-24 [JB] # o ROBUSTNESS: Asserts that argument 'indices' to readClfEnv() and # readPgfEnv() is ordered. # 2008-03-22 [JB] # o Added read(Clf|Pgf)Env() functions. # 2008-03-04 [MM] # o Added CLF and PGF parsers by Martin Morgan (MM). ############################################################################ affxparser/R/testWriteAndReadEmptyCdf.R0000644000175200017520000000132414516003651021127 0ustar00biocbuildbiocbuild## out <- system('R -e "affxparser:::.testWriteAndReadEmptyCdf()"', intern=TRUE) ## res <- any(regexpr("COMPLETE", out) != -1); .testWriteAndReadEmptyCdf <- function() { pathname <- system.file("testscripts/writeAndReadEmptyCdf.R", package="affxparser"); cat("Pathname: ", pathname, "\n", sep=""); source(pathname, echo=TRUE); cat("COMPLETE\n"); } # .testWriteAndReadEmptyCdf() ############################################################################ # HISTORY: # 2012-05-18 # o Added because of the OSX build bug, cf. # https://groups.google.com/d/topic/aroma-affymetrix/lEfDanThLEA/discussion # o Created. ############################################################################ affxparser/R/testWriteAndReadEmptyCel.R0000644000175200017520000000117014516003651021135 0ustar00biocbuildbiocbuild## out <- system('R -e "affxparser:::.testWriteAndReadEmptyCel()"', intern=TRUE) ## res <- any(regexpr("COMPLETE", out) != -1); .testWriteAndReadEmptyCel <- function() { pathname <- system.file("testscripts/writeAndReadEmptyCel.R", package="affxparser"); cat("Pathname: ", pathname, "\n", sep=""); source(pathname, echo=TRUE); cat("COMPLETE\n"); } # .testWriteAndReadEmptyCel() ############################################################################ # HISTORY: # 2012-09-26 # o Created from testWriteAndReadEmptyCdf.R. ############################################################################ affxparser/R/tpmap2bpmap.R0000644000175200017520000000023114516003651016501 0ustar00biocbuildbiocbuildtpmap2bpmap <- function(tpmapname, bpmapname, verbose = 0){ invisible(.Call("R_affx_write_bpmap_file", bpmapname, tpmapname, as.integer(verbose))) } affxparser/R/traverseCcg.R0000644000175200017520000001330114516003651016530 0ustar00biocbuildbiocbuild.findCcgParent <- function(dataHeader, dataTypeId, ...) { for (parent in dataHeader$parents) { if (identical(parent$dataTypeId, dataTypeId)) return(parent); } NULL; } # .findCcgParent() .getCelHeaderVersion <- function(header, ...) { version <- header$version; if (is.null(version)) { version <- header$fileHeader$version; } if (is.null(version)) { stop("Cannot identify header version. Argument 'header' has an unknown format: ", class(header)[1]); } version; } # .getCelHeaderVersion() # Get the DatHeader from the CCG CEL header .getCelDatHeader <- function(header, ...) { version <- .getCelHeaderVersion(header); if (version == 1) { # Command Console Generic (Calvin) format dataHeader <- header$dataHeader; parent <- .findCcgParent(dataHeader, dataTypeId="affymetrix-calvin-scan-acquisition"); datHeader <- parent$parameters[["affymetrix-dat-header"]]; } else if (version == 3) { datHeader <- .unwrapCelHeaderV3String(header)$datHeader; datHeader <- .wrapDatHeader(datHeader); } else if (version == 4) { datHeader <- .unwrapCelHeaderV4(header)$header$DatHeader; datHeader <- .wrapDatHeader(datHeader); } else { stop("Cannot extract DAT header from CEL header. Unknown CEL header version: ", version); } datHeader; } # .getCelDatHeader() # Extract a CEL header of v3 from the CCG CEL header .getCelHeaderV3 <- function(header, ...) { version <- .getCelHeaderVersion(header); if (version == 1) { # Command Console Generic (Calvin) format dataHeader <- header$dataHeader; params <- dataHeader$parameters; # Algorithm parameters pattern <- "^affymetrix-algorithm-param-"; idxs <- grep(pattern, names(params)); aParams <- params[idxs]; names(aParams) <- gsub(pattern, "", names(aParams)); hdr <- NULL; rows <- as.integer(params[["affymetrix-cel-rows"]][1]); cols <- as.integer(params[["affymetrix-cel-cols"]][1]); hdr <- c(hdr, sprintf("Cols=%d\nRows=%d\n", cols, rows)); hdr <- c(hdr, sprintf("TotalX=%d\nTotalY=%d\n", cols, rows)); hdr <- c(hdr, sprintf("OffsetX=0\nOffsetY=0\n", 0, 0)); for (ff in c("UL", "UR", "LR", "LL")) { xkey <- sprintf("Grid%sX", ff); ykey <- sprintf("Grid%sY", ff); x <- as.integer(aParams[[xkey]][1]) y <- as.integer(aParams[[ykey]][1]) hdr <- c(hdr, sprintf("GridCorner%s=%d %d\n", ff, x, y)); } hdr <- c(hdr, sprintf("Axis-invertX=%d\nAxisInvertY=%d\n", 0, 0)); hdr <- c(hdr, sprintf("swapXY=%d\n", 0)); parent <- .findCcgParent(dataHeader, dataTypeId="affymetrix-calvin-scan-acquisition"); # Infer DAT header datHeader <- parent$parameters[["affymetrix-dat-header"]]; if (is.null(datHeader)) { value <- parent$parameters[["affymetrix-partial-dat-header"]]; pos <- regexpr(":CLS=", value); if (pos != -1) { value <- substring(value, pos+1); datHeader <- sprintf("[%d..%d] %s:%s", 0, 65535, "dummy", value); } } if (is.null(datHeader)) { stop("Failed to locate a valid DAT header in the AGCC file header."); } hdr <- c(hdr, sprintf("DatHeader=%s\n", datHeader)); hdr <- c(hdr, sprintf("Algorithm=%s\n", params[["affymetrix-algorithm-name"]])); excl <- grep("^Grid", names(aParams)); aParams <- aParams[-excl]; aParams <- sapply(aParams, FUN=function(x) x[1]); aParams <- paste(names(aParams), aParams, sep=":"); aParams <- paste(aParams, collapse=";"); hdr <- c(hdr, sprintf("AlgorithmParameters=%s\n", aParams)); hdr <- paste(hdr, collapse=""); headerV3 <- hdr; } else if (version == 3) { # Nothing to do. headerV3 <- header; datHeader <- .wrapDatHeader(datHeader); } else if (version == 4) { # To do: Create a v3 header from scratch (for consistency). headerV3 <- header$header; } else { stop("Cannot extract CEL header of v3 from CEL header. Unknown CEL header version: ", version); } headerV3; } # .getCelHeaderV3() .getCelHeaderV4 <- function(header, ...) { version <- .getCelHeaderVersion(header); if (version == 1) { # Calvin CEL header? if (is.null(header$fileHeader)) { # Re-read the CEL CCG v1 header headerV4 <- header; header <- readCcgHeader(headerV4$filename); } else { # Re-read the CEL v4 header headerV4 <- readCelHeader(header$filename); } # Append CEL v3 header headerV4$header <- .getCelHeaderV3(header); headerV4 <- .unwrapCelHeaderV4(headerV4); headerV4 <- .wrapCelHeaderV4(headerV4); } else if (version == 3) { stop("Cannot get CEL header of v4 from CEL header of v3. Non-implemented feature."); } else if (version == 4) { headerV4 <- .wrapCelHeaderV4(.unwrapCelHeaderV4(header)); } else { stop("Cannot extract CEL header of v3 from CEL header. Unknown CEL header version: ", version); } headerV4; } # .getCelHeaderV4() ############################################################################ # HISTORY: # 2015-04-15 # o BUG FIX: .getCelHeaderV4() on a CCG/v1 header could give "Error in # sprintf("GridCorner%s=%d %d\n" ... invalid format '%d' ...)". # 2012-05-18 # o Now using stop() instead of throw(). # 2007-10-12 # o Now .getCelHeaderV3() tries to infer the DAT header from parent # parameters 'affymetrix-partial-dat-header' if 'affymetrix-dat-header' # is not available. If neither is found, an informative error is thrown. # 2007-08-16 # o Added .getCelHeaderV4(). Verified to work with CEL v1 & v4 headers. # o Added .getCelHeaderV3(). Verified to work with CEL v1, v3 & v4 headers. # o Added .getCelDatHeader(). Verified to work with CEL v1 & v4 headers. # o Created. ############################################################################ affxparser/R/updateCel.R0000644000175200017520000003230414516003651016172 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction updateCel # # @title "Updates a CEL file" # # @synopsis # # \description{ # @get "title". # } # # \arguments{ # \item{filename}{The filename of the CEL file.} # \item{indices}{A @numeric @vector of cell (probe) indices specifying # which cells to updated. If @NULL, all indices are considered.} # \item{intensities}{A @numeric @vector of intensity values to be stored. # Alternatively, it can also be a named @data.frame or @matrix (or @list) # where the named columns (elements) are the fields to be updated.} # \item{stdvs}{A optional @numeric @vector.} # \item{pixels}{A optional @numeric @vector.} # \item{writeMap}{An optional write map.} # \item{...}{Not used.} # \item{verbose}{An @integer specifying how much verbose details are # outputted.} # } # # \value{ # Returns (invisibly) the pathname of the file updated. # } # # \details{ # Currently only binary (v4) CEL files are supported. # The current version of the method does not make use of the Fusion SDK, # but its own code to navigate and update the CEL file. # } # # @examples "../incl/updateCel.Rex" # # @author "HB" # # @keyword "file" # @keyword "IO" #*/######################################################################### updateCel <- function(filename, indices=NULL, intensities=NULL, stdvs=NULL, pixels=NULL, writeMap=NULL, ..., verbose=0) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'filename': if (!file.exists(filename)) { stop("Cannot update CEL file. File not found: ", filename); } header <- readCelHeader(filename); version <- header$version; if (!version %in% 4) { stop("Updating CEL v", version, " files is not supported: ", filename); } nbrOfCells <- header$total; # Argument 'indices': if (is.null(indices)) { nbrOfIndices <- nbrOfCells; } else { # A CEL file has one-based indices r <- range(indices); if (r[1] < 1 || r[2] > nbrOfCells) { stop("Argument 'indices' is out of range [1,", nbrOfCells, "]: ", "[", r[1], ",", r[2], "]"); } nbrOfIndices <- length(indices); } # Argument 'intensities': if (is.matrix(intensities)) { intensities <- as.data.frame(intensities); } if (is.list(intensities) || is.data.frame(intensities)) { if (is.list(intensities)) { fields <- names(intensities); } else { fields <- colnames(intensities); } if (is.null(stdvs) && ("stdvs" %in% fields)) { stdvs <- intensities[["stdvs"]]; } if (is.null(pixels) && ("pixels" %in% fields)) { pixels <- intensities[["pixels"]]; } if ("intensities" %in% fields) { intensities <- intensities[["intensities"]]; } } # Argument 'intensities': if (!is.null(intensities)) { if (!is.double(intensities)) intensities <- as.double(intensities); if (length(intensities) != nbrOfIndices) { stop("Number of 'intensities' values does not match the number of cell indices: ", length(intensities), " != ", nbrOfIndices); } } # Argument 'stdvs': if (!is.null(stdvs)) { if (!is.double(stdvs)) stdvs <- as.double(stdvs); if (length(stdvs) != nbrOfIndices) { stop("Number of 'stdvs' values does not match the number of cell indices: ", length(stdvs), " != ", nbrOfIndices); } } # Argument 'pixels': if (!is.null(pixels)) { if (!is.integer(pixels)) pixels <- as.integer(pixels); if (length(pixels) != nbrOfIndices) { stop("Number of 'pixels' values does not match the number of cell indices: ", length(pixels), " != ", nbrOfIndices); } } # Argument 'writeMap': if (!is.null(writeMap)) { writeMap <- .assertMap(writeMap, nbrOfCells); } # Argument 'verbose': if (length(verbose) != 1) stop("Argument 'verbose' must be a single integer."); verbose <- as.integer(verbose); if (!is.finite(verbose)) stop("Argument 'verbose' must be an integer: ", verbose); # Nothing to do? if (nbrOfIndices == 0) { return(invisible(filename)); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Reorder data such that it is written in optimal order # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - reorder <- TRUE; if (is.null(indices)) { # Has write map? if (is.null(writeMap)) { indices <- 1:nbrOfIndices; reorder <- FALSE; } else { indices <- writeMap; } } if (reorder) { if (verbose >= 2) cat("Re-ordering data for optimal write order..."); o <- order(indices); indices <- indices[o]; if (!is.null(intensities)) intensities <- intensities[o]; if (!is.null(stdvs)) stdvs <- stdvs[o]; if (!is.null(pixels)) pixels <- pixels[o]; o <- NULL; # Not needed anymore if (verbose >= 2) cat("done.\n"); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Write data to file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - version <- header$version; if (version == 4) { # Open CEL file con <- file(filename, open="r+b"); on.exit(close(con)); # Skip CEL header if (verbose >= 2) cat("Skipping to beginging of data section..."); h <- .readCelHeaderV4(con); # "Cell entries - this consists of an intensity value, standard # deviation value and pixel count for each cell in the array. # The values are stored by row then column starting with the X=0, # Y=0 cell. As an example, the first five entries are for cells # defined by XY coordinates: (0,0), (1,0), (2,0), (3,0), (4,0). # Type: (float, float, short) = 4 + 4 + 2 = 10 bytes / cell # cellData <- c(readFloat(con), readFloat(con), readShort(con)); sizeOfCell <- 10; # Current file position dataOffset <- seek(con, origin="start", rw="read"); if (verbose >= 2) cat("done.\n"); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update in chunks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - CHUNK.SIZE <- 2^19; # = 524288 indices == 5.2Mb CHUNK.SIZE <- 2^20; # = 1048576 indices == 10.5Mb # Work with zero-based indices indices <- indices - 1; count <- 1; offset <- dataOffset; nbrOfChunks <- ceiling(length(indices) / CHUNK.SIZE); while (length(indices) > 0) { if (verbose >= 1) { cat(sprintf("Number of indices left: %d\n", length(indices))); cat(sprintf("Updating chunk #%d of %d...\n", count, nbrOfChunks)); } # Recall: All indices are ordered! # Shift offset to the first index. firstIndex <- indices[1]; offset <- offset + sizeOfCell*firstIndex; # Shift indices such that first index is zero. indices <- indices - firstIndex; # Get largest index maxIndex <- indices[length(indices)]; # verbose && cat(verbose, "Largest index: ", maxIndex, "\n"); # Identify the indices to update such no more than CHUNK.SIZE cells # are read/updated. n <- which.max(indices >= CHUNK.SIZE); if (n == 1) n <- length(indices); subset <- 1:n; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read the data section of the CEL file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (verbose >= 1) cat("Reading chunk data section..."); seek(con, origin="start", where=offset, rw="read"); rawAll <- readBin(con=con, what="raw", n=sizeOfCell*(indices[n]+1)); if (verbose >= 1) cat("done.\n"); # Common to all fields raw <- NULL; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update 'intensities' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (!is.null(intensities)) { if (verbose >= 1) cat("Updating 'intensities'..."); # Write floats (size=4) to raw vector raw <- raw(length=4*n); raw <- writeBin(con=raw, intensities[subset], size=4, endian="little"); intensities <- intensities[-subset]; # Not needed anymore # Updated 'rawAll' accordingly idx <- rep(sizeOfCell*indices[subset], each=4) + 1:4; rawAll[idx] <- raw; idx <- NULL; # Not needed anymore if (verbose >= 1) cat("done.\n"); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update 'stdvs' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (!is.null(stdvs)) { if (verbose >= 1) cat("Updating 'stdvs'..."); # Write floats (size=4) to raw vector if (length(raw) != 4*n) raw <- raw(length=4*n); raw <- writeBin(con=raw, stdvs[subset], size=4, endian="little"); stdvs <- stdvs[-subset]; # Not needed anymore # Updated 'rawAll' accordingly idx <- rep(sizeOfCell*indices[subset], each=4) + 5:8; rawAll[idx] <- raw; idx <- NULL; # Not needed anymore if (verbose >= 1) cat("done.\n"); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update 'pixels' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (!is.null(pixels)) { raw <- NULL; # Not needed anymore if (verbose >= 1) cat("Updating 'pixels'..."); # Write short integers (size=2) to raw vector raw <- raw(length=2*n); raw <- writeBin(con=raw, pixels[subset], size=2, endian="little"); pixels <- pixels[-subset]; # Not needed anymore # Updated 'rawAll' accordingly idx <- rep(sizeOfCell*indices[subset], each=2) + 9:10; rawAll[idx] <- raw; idx <- NULL; # Not needed anymore if (verbose >= 1) cat("done.\n"); } raw <- NULL; # Not needed anymore # Remove updated indices indices <- indices[-subset]; subset <- NULL; # Not needed anymore # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Write raw data back to file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (verbose >= 1) cat("Writing chunk to data section..."); seek(con, origin="start", where=offset, rw="write"); writeBin(con=con, rawAll); if (verbose >= 1) cat("done.\n"); rawAll <- NULL; # Not needed anymore if (verbose >= 1) cat(sprintf("Updating chunk #%d of %d...done\n", count, nbrOfChunks)); count <- count + 1; } # while (...) } # if (version ...) invisible(filename); } ############################################################################ # HISTORY: # 2007-01-04 # o Added argument 'writeMap'. # 2006-08-19 # o BUG FIX: Wow wow wow. This one was tricky to find. If not specifying # the 'rw' argument in seek() it defaults to "", which is not "read" as # I naively though (because I did not read the inner details of ?seek), # but the latest call to seek. In other words, since I at the end of # every "chunk" loop call seek(..., rw="write") the seek(..., [rw=""]) # was equal to a seek(..., rw="write"), but I wanted seek(..., rw="read")! # That made updateCel() do funny things and write to the wrongs parts # of the file etc. # 2006-08-18 # o BUG FIX: The new implementation where data was written to raw vectors # was incorrect. A lot of extra zeros was written. The "eastern egg" in # the updated example contains an image from http://tpo.berkeley.edu/. # 2006-08-14 # o BUG FIX: updateCel() would in some cases give "Error: subscript out of # bounds" when writing the last chunk. # 2006-07-22 # o Update updateCel() to update data in chunks, because updating the # complete data section is expensive. For example, a 500K chip has # 6553600 cells each of size 10 bytes, i.e. >65Mb or raw memory. With # copying etc it costs >100-200Mb of memory to update a CEL file if only # the first *and* the last cell is updated. Now it should only be of # the order of 10-20Mb per chunk. # o Added verbose output to updateCel(). # o Now updateCel() deallocates objects as soon as possible in order to # free up as much memory as possible. Had memory problems with the 500K's. # 2006-07-21 # o updateCel() was really slow when updating a large number of cells. # Now the idea is to write to raw vectors stored in memory. By reading # the chunk of the CEL data section that is going to be updated as a raw # data vector and then updating this in memory first, and the re-write # that chuck of raw data to file, things are much faster. # o BUG FIX: updateCel(..., indices=NULL) would generate an error, because # we tried to reorder by order(indices). # 2006-06-19 # o Replace 'data' argument with arguments 'intensities', 'stdvs', and # 'pixels'. /HB # 2006-06-18 # o First version can update CEL v4 (binary) cell entries. Note that this # code does not make use of the Fusion SDK library. This may updated in # the future, but for now we just want something that works. # o Created. /HB ############################################################################ affxparser/R/updateCelUnits.R0000644000175200017520000001400714516003651017215 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction updateCelUnits # # @title "Updates a CEL file unit by unit" # # @synopsis # # \description{ # @get "title".\cr # # \emph{Please note that, contrary to @see "readCelUnits", this method # can only update a single CEL file at the time.} # } # # \arguments{ # \item{filename}{The filename of the CEL file.} # \item{cdf}{A (optional) CDF @list structure either with # field \code{indices} or fields \code{x} and \code{y}. # If @NULL, the unit names (and from there the cell indices) are # inferred from the names of the elements in \code{data}. # } # \item{data}{A @list structure in a format similar to what is returned # by @see "readCelUnits" for \emph{a single CEL file only}.} # \item{...}{Optional arguments passed to @see "readCdfCellIndices", # which is called if \code{cdf} is not given.} # \item{verbose}{An @integer specifying how much verbose details are # outputted.} # } # # \value{ # Returns what @see "updateCel" returns. # } # # \section{Working with re-arranged CDF structures}{ # Note that if the \code{cdf} structure is specified the CDF file is # \emph{not} queried, but all information about cell x and y locations, # that is, cell indices is expected to be in this structure. This can # be very useful when one work with a cdf structure that originates # from the underlying CDF file, but has been restructured for instance # through the @see "applyCdfGroups" method, and \code{data} # correspondingly. This update method knows how to update such # structures too. # } # # @examples "../incl/updateCelUnits.Rex" # # @author "HB" # # \seealso{ # Internally, @see "updateCel" is used. # } # # @keyword "file" # @keyword "IO" #*/######################################################################### updateCelUnits <- function(filename, cdf=NULL, data, ..., verbose=0) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'data': if (is.list(data)) { # For now, assume a correct cel structure } else { stop("Argument 'data' must be a list: ", mode(data)); } # Argument 'cdf': if (is.list(cdf)) { } else if (!is.null(cdf)) { stop("Argument 'cdf' must be a list or NULL: ", mode(cdf)); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Find out what cell indices are to be written # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.null(cdf)) { # No cell indices were given. Thus, pick the unit names from the 'data' # object and identify the unit indices. Then, read the cell indices # for these units. # i. Get the chip type chipType <- readCelHeader(filename)$chiptype; # ii. Find the CDF file cdfFile <- findCdf(chipType); # iii. Read all unit names unitNames <- readCdfUnitNames(cdfFile); # CRASHES AFTER MULTIPLE CALLS! # iv. Map unit names to unit indices units <- match(names(data), unitNames); # v. Validate if (any(is.na(units))) { stop("Could not identify unit indices. Some unit names in argument 'data' do not exist in the CDF for '", chipType, "'."); } cdf <- readCdfCellIndices(cdfFile, units=units, ...); # Confirmed: 2006-08-22 } else if (is.list(cdf)) { # Reformat 'cdf' to only contain cell 'indices' unit <- cdf[[1]]; groups <- unit$groups; group <- groups[[1]]; if ("indices" %in% names(group)) { if (!identical(names(group), "indices")) { cdf <- applyCdfGroups(cdf, FUN=cdfGetFields, fields="indices"); } } else if (all(c("x", "y") %in% names(group))) { ncol <- readCelHeader(filename)$cols; cdf <- applyCdfGroups(cdf, FUN=function(groups) { lapply(groups, FUN=function(group) { list(indices=as.integer(group$y*ncol+group$x+1)); }) }) } else { stop("Argument 'cdf' must be a CDF structure with group fields 'indices' or 'x' & 'y': ", paste(names(group), collapse=", ")); } } # For now, assume the 'cdf' contains cell 'indices' only. indices <- unlist(cdf, use.names=FALSE); ncells <- length(indices); # cat(sprintf("Number of cells to be updated: %d\n", ncells)); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Investigate the 'data' list to see what fields are included. # We do this under the assumption that all units have the same # structure. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - group <- data[[1]][[1]]; fields <- names(group); # Pull out the fields that can be written to a CEL file fields <- intersect(fields, c("intensities", "stdvs", "pixels")); group <- NULL; # Not needed anymore # cat(sprintf("Fields to be updated: %s\n", paste(fields, collapse=", "))); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the data for each of these fields (one by one). # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - values <- list(); for (field in fields) { tmp <- lapply(data, lapply, .subset2, field); values[[field]] <- unlist(tmp, use.names=FALSE); tmp <- NULL; # Not needed anymore } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update CEL file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - updateCel(filename, indices=indices, intensities=values$intensities, stdvs=values$stdvs, pixels=values$pixels); } # updateCelUnits() ############################################################################ # HISTORY: # 2006-08-22 # o There is some weird memory bug in at least Windows that makes R crash # on multiple (10-20) subsequent calls to readCdfNnn(). Have been # troubleshooting for a more than a day, but I can't find why it is. # Email HB for details. # 2006-08-21 # o First test passed. # o Created. ############################################################################ affxparser/R/writeCcg.R0000644000175200017520000001644114516003651016037 0ustar00biocbuildbiocbuild.writeCcgFileHeader <- function(con, header, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - writeUByte <- function(con, value, ...) { writeBin(as.integer(value), con=con, size=1, endian="big"); } writeInt <- function(con, value, ...) { writeBin(as.integer(value), con=con, size=4, endian="big"); } writeUInt <- function(con, value, ...) { writeBin(as.integer(value), con=con, size=4, endian="big"); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate 'header': if (!is.list(header)) stop("Argument 'header' must be a list: ", mode(header)); if (is.null(header$nbrOfDataGroups)) stop("Missing element 'nbrOfDataGroups' in argument 'header'."); if (is.null(header$dataGroupStart)) stop("Missing element 'dataGroupStart' in argument 'header'."); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Further validation # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Default version is 1 (one) [1] if (is.null(header$version)) header$version <- as.integer(1); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Writing # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # The magic for Command Console generic data file format is 59 [1] magic <- writeUByte(con, value=59); # Write version version <- writeUByte(con, value=header$version); # Write number of data groups nbrOfDataGroups <- writeInt(con, value=header$nbrOfDataGroups); # Write file position of first data groups dataGroupStart <- writeUInt(con, value=header$nbrOfDataGroups); # Return the current file write position invisible(seek(con, origin="start", rw="write")); } # .writeCcgFileHeader() # Generic Data Header # This section stores the file and file type identifiers, data to # describe the contents of the file, parameters on how it was created # and information about its parentage. This section contains a circular # dependency so as to traverse across the entire parentage of a file. # This information will provide the entire history of how a file came # to be. # # The first data header section immediately follows the file header # section. # # Item Type Description # 1 GUID The data type identifier. This is used to identify the type # of data stored in the file. For example: # * acquisition data (affymetrix-calvin-scan-acquisition) # * intensity data (tbd) # * expression results (tbd) # * genotyping results (tbd) # 2 GUID Unique file identifier. This is the identifier to use to # link the file with parent files. This identifier will be # updated whenever the contents of the file change. # Example: When a user manually aligns the grid in a DAT file # the grid coordinates are updated in the DAT file and the file # is given a new file identifier. # 3 DATETIME Date and time of file creation. # 4 LOCALE The locale of the operating system that the file was created on. # 5 INT The number of name/type/value parameters. # 6 WVT[] Array of parameters stored as name/value/type triplets. # WVT[]=(WSTRING/VALUE/TYPE)[] # 7 INT Number of parent file headers. # 8 GDH[] Array of parent file headers. GDH[]=GenericDataHeader[] .writeCcgDataHeader <- function(con, header, ...) { # To please R CMD check charToInt <- NULL; rm(list="charToInt"); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - writeUByte <- function(con, value, ...) { writeBin(as.integer(value), con=con, size=1, endian="big"); } writeInt <- function(con, value, ...) { writeBin(as.integer(value), con=con, size=4, endian="big"); } writeUInt <- function(con, value, ...) { writeBin(as.integer(value), con=con, size=4, endian="big"); } writeString <- function(con, str, ...) { # A 1 byte character string. A string object is stored as an INT # (to store the string length) followed by the CHAR array (to store # the string contents). str <- as.character(str); nchars <- as.integer(nchar(str)); writeInt(con, value=nchars); writeChar(str, con=con, nchars=nchars); } writeWChar <- function(object, con, nchars=nchar(object,type="chars"), ...) { # Tho bytes per character str <- as.character(object); # Convert to unicode characters n <- nchar(str); raw <- matrix(raw(2*n), nrow=2, ncol=n); bfr <- charToInt(strsplit(str, split="")[[1]]); raw[2,] <- as.raw(bfr); raw <- as.vector(raw); # Write raw buffer writeBin(raw, con=con); } writeWString <- function(con, str, ...) { # A UNICODE string. A string object is stored as an INT (to store the # string length) followed by the WCHAR array (to store the string # contents). str <- as.character(str); nchars <- as.integer(nchar(str)); writeInt(con, value=nchars); writeWChar(str, con=con, nchars=nchars); } writeGuid <- function(con, id, ...) { writeString(con, str=id, ...); } writeDateTime <- function(con, timestamp, ...) { writeWString(con, str=timestamp, ...); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate 'header': if (!is.list(header)) stop("Argument 'header' must be a list: ", mode(header)); if (is.null(header$nbrOfDataGroups)) stop("Missing element 'nbrOfDataGroups' in argument 'header'."); if (is.null(header$dataGroupStart)) stop("Missing element 'dataGroupStart' in argument 'header'."); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Further validation # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Default version is 1 (one) [1] if (is.null(header$version)) header$version <- as.integer(1); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Writing # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type identifier writeGuid(con, id=header$dataTypeId); # Unique file identifier writeGuid(con, id=header$fileId); # Unique file identifier writeDateTime(con, id=header$timestamp); # Write version version <- writeUByte(con, value=header$version); # Write number of data groups nbrOfDataGroups <- writeInt(con, value=header$nbrOfDataGroups); # Write file position of first data groups dataGroupStart <- writeUInt(con, value=header$nbrOfDataGroups); # Return the current file write position invisible(seek(con, origin="start", rw="write")); } # .writeCcgDataHeader() ############################################################################ # HISTORY: # 2012-05-18 # o Now using stop() instead of throw(). # 2007-08-16 # o This file only contains a stub, so there is currently no # writeCcgHeader() or writeCcg(). # 2006-11-06 # o Created. ############################################################################ affxparser/R/writeCdf.R0000644000175200017520000001132414516003651016032 0ustar00biocbuildbiocbuildwriteCdf <- function(fname, cdfheader, cdf, cdfqc, overwrite=FALSE, verbose=0) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (length(cdf) != cdfheader$nunits) { stop("Number of units in 'cdf' does not match the number of units in the CDF header: ", length(cdf), " != ", cdfheader$nunits); } if (length(cdfqc) != cdfheader$nqcunits) { stop("Number of QC units in 'cdfqc' does not match the number of QC units in the CDF header: ", length(cdfqc), " != ", cdfheader$nqcunits); } if(verbose >= 1) { cat("Writing CDF file...\n"); cat(" Pathname: ", fname, "\n", sep=""); } if(file.exists(fname) && !overwrite) stop("Cannot write CDF: File already exists: ", fname); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Figure out number of bytes per QC unit and regular unit # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (cdfheader$nqcunits > 0) { # Start positions for QC units lens <- lapply(cdfqc, FUN=.subset2, "ncells"); lens <- unlist(lens, use.names=FALSE); lens <- 6 + 7*lens; qcUnitLengths <- lens; } else { qcUnitLengths <- NULL; } if (cdfheader$nunits > 0) { # Start positions for units # Number of bytes: 20 + (18+64)*nbrOfGroups + 14*totalNbrOfCells bytes lens <- lapply(cdf, FUN=function(unit) { ncells <- .subset2(unit, "ncells"); ngroups <- length(.subset2(unit, "groups")); 20 + 82*ngroups + 14*ncells; }) lens <- unlist(lens, use.names=FALSE); unitLengths <- lens; } else { unitLengths <- NULL; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Write CDF # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Open output connection con <- file(fname, open = "wb"); on.exit(close(con)); # Write CDF header writeCdfHeader(con=con, cdfheader, unitNames=names(cdf), qcUnitLengths=qcUnitLengths, unitLengths=unitLengths, verbose=verbose); # Write QC units writeCdfQcUnits(con=con, cdfqc, verbose=verbose); # Write regular units writeCdfUnits(con=con, cdf, verbose=verbose); if(verbose >= 1) cat("Writing CDF file...done\n"); invisible(NULL); } # writeCdf() ############################################################################ # HISTORY: # 2012-05-18 # o Now using stop() instead of throw(). # 2007-01-10 /HB # o Added writeCdfHeader(), writeCdfQcUnits() and writeCdfUnits(). With # these it is now possible to build up the CDF in chunks. # o Removed obsolete arguments 'addName' and 'addPositions' and all related # code. Internal variable 'positions' is not needed anymore. # There are no more seek():s in the code. # o Removed obsolete .writeCdfUnit2(). # o Now only every 1000th unit (instead of 100th) is reported. It is now # also a count down. # 2006-12-18 /KS # o Make global replacement "block" -> "group" to maintain consistency # with other code, pursuant to communication from KH. # 2006-10-25 /HB (+KS) # o BUG FIX: .initializeCdf() was writing false file offset for QC units # when the number QC nunits were zero. This would core dump readCdfNnn(). # 2006-09-21 /HB # o BUG FIX: The 'atom' and 'indexpos' fields were swapped. # o Now suppressing warnings "writeChar: more characters requested..." in # writeCdf(). # 2006-09-11 /HB # o BUG FIX: nrows & ncols were swapped in the CDF header. # 2006-09-09 /HB # o Updated writeCdf() has been validate with compareCdfs() on a few arrays. # o With the below "optimizations" writeCdf() now writes Hu6800.CDF with # units in 130s compared to 140s. # o Now initializeCdf() dumps all unit names at once by first building a # raw vector. This is now much faster than before. # o Now writeCdf() does not seek() around in the file anymore. This should # speed up writing at least a bit. # o Made some optimization, which speeds up the writing a bit. Jumping # around in the file with seek() is expensive and should be avoided. # o Rename writeUnit() to writeCdfUnit() and same for the QC function. # o Added more verbose output and better errror messages for writeCdf(). # 2006-09-07 /HB # o Maybe initalizeCdf(), writeUnit(), and writeQcUnit() should be made # private functions of this package. # o Removed textCdf2binCdf() skeleton. See convertCdf() instead. # o Updated writeCdf() such that the connection is guaranteed to be closed # regardless. ############################################################################ affxparser/R/writeCdf.private.R0000644000175200017520000003354414516003651017513 0ustar00biocbuildbiocbuild.initializeCdf <- function(con, nRows = 1, nCols = 1, nUnits = 1, nQcUnits = 0, refSeq = "", unitnames = rep("", nUnits), qcUnitLengths = rep(0, nQcUnits), unitLengths = rep(0, nUnits), ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if(length(qcUnitLengths) != nQcUnits) { stop("Number of elements in argument 'qcUnitLengths' does not match 'nQcUnits'"); } if(length(unitLengths) != nUnits) { stop("Number of elements in argument 'unitLengths' does not match 'nUnits'"); } if(length(refSeq) != 1) { stop("Argument 'refSeq' should be a single character."); } lrefSeq <- nchar(refSeq); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # CDF header # # 1 Magic number. Always set to 67. [integer] # 2 Version number. [integer] # 3 The number of columns of cells on the array. [unsigned short] # 4 The number of rows of cells on the array. [unsigned short] # 5 The number of units in the array not including QC units. The term # unit is an internal term which means probe set. [integer] # 6 The number of QC units. [integer] # 7 The length of the resequencing reference sequence. [integer] # 8 The resequencing reference sequence. [char[len]] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - offset <- 0; ## Magic number and version number writeBin(object = as.integer(c(67, 1)), con = con, size = 4, endian = "little") ## NCols, NRows writeBin(object = as.integer(c(nCols, nRows)), con = con, size = 2, endian = "little") ## NumberUnits, NumberQCUnits writeBin(object = as.integer(c(nUnits, nQcUnits)), con = con, size = 4, endian = "little") ## Length of refSeqsequence writeBin(object = as.integer(lrefSeq), con = con, size = 4, endian = "little") offset <- 24; fOffset <- seek(con=con, origin="start", rw="write"); if (offset != fOffset) { stop("File format write error (step 1): File offset is not the excepted one: ", fOffset, " != ", offset); } ## RefSeqsequece if(lrefSeq > 0) writeChar(as.character(refSeq), con=con, eos=NULL); # Current offset offset <- offset + lrefSeq; fOffset <- seek(con=con, origin="start", rw="write"); if (offset != fOffset) { stop("File format write error (step 2): File offset is not the excepted one: ", fOffset, " != ", offset); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Unit names # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Write to raw vector (2*10^6 units => 122Mb; should be ok for now) # Since we can't create strings with '\0':s, we use '\xFF', # write to raw and then replace '\xFF' with '\0'. Thus, unit names with # '\xFF' are invalid, but this should not be a real problem. pads <- sapply(0:64, FUN=function(x) paste(rep("\xFF", x), collapse="")); # Write the unit names in chunks to save memory nbrOfUnits <- length(unitnames); chunkSize <- 100000; nbrOfChunks <- ceiling(nbrOfUnits / chunkSize); # Allocate raw vector raw <- raw(64*chunkSize); for (kk in 1:nbrOfChunks) { # Units for this chunk from <- (kk-1)*chunkSize+1; to <- min(from+chunkSize-1, nbrOfUnits); unitnamesFF <- unitnames[from:to]; # Pad the unit names unitnamesFF <- paste(unitnamesFF, pads[64-nchar(unitnamesFF)], sep=""); # Truncate last chunk? if (chunkSize > length(unitnamesFF)) { raw <- raw[1:(64*length(unitnamesFF))]; } # Write unit names to raw vector raw <- writeBin(con=raw, unitnamesFF, size=1); unitnamesFF <- NULL; # Not needed anymore # Replace all '\xFF' with '\0'. idxs <- which(raw == as.raw(255)); raw[idxs] <- as.raw(0); idxs <- NULL; # Not needed anymore writeBin(con=con, raw); } # for (kk in ...) raw <- NULL; # Not needed anymore bytesOfUnitNames <- 64 * nUnits; offset <- offset + bytesOfUnitNames; fOffset <- seek(con=con, origin="start", rw="write"); if (offset != fOffset) { stop("File format write error (step 3): File offset is not the excepted one: ", fOffset, " != ", offset); } bytesOfQcUnits <- 4 * nQcUnits; offset <- offset + bytesOfQcUnits; bytesOfUnits <- 4 * nUnits; offset <- offset + bytesOfUnits; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # QC units file positions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (nQcUnits > 0) { csum <- cumsum(qcUnitLengths); nextOffset <- csum[nQcUnits]; starts <- c(0, csum[-nQcUnits]); starts <- as.integer(offset + starts); writeBin(starts, con = con, size = 4, endian = "little") } else { nextOffset <- 0; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Units file positions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - offset <- offset + nextOffset; if (nUnits > 0) { csum <- cumsum(unitLengths); nextOffset <- csum[nUnits]; starts <- c(0, csum[-nUnits]); starts <- as.integer(offset + starts); writeBin(starts, con = con, size = 4, endian = "little"); } else { nextOffset <- 0; } } # .initializeCdf() .writeCdfUnit <- function(unit, con, unitname=NULL) { ## 3. Write the unit unitType <- unit$unittype if (!is.numeric(unitType)) { unitType <- switch(unitType, unknown = 0, expression = 1, genotyping = 2, resequencing = 3, tag = 4, copynumber = 5, genotypingcontrol = 6, expressioncontrol = 7) } unitDirection <- unit$unitdirection if (!is.numeric(unitDirection)) { unitDirection <- switch(unitDirection, nodirection = 0, sense = 1, antisense = 2, unknown = 3) } unitInfo <- as.integer(c(unitType, unitDirection, unit$natoms, length(unit$groups), unit$ncells, unit$unitnumber, unit$ncellsperatom)) # Number of bytes: 2+1+4*4+1=20 bytes writeBin(unitInfo[1], con = con, size = 2, endian = "little") writeBin(unitInfo[2], con = con, size = 1, endian = "little") writeBin(unitInfo[3:6], con = con, size = 4, endian = "little") writeBin(unitInfo[7], con = con, size = 1, endian = "little") ## Writing each group in turn # Number of bytes: (18+64)*nbrOfGroups + 14*totalNbrOfCells bytes groupDirections <- c(nodirection=0, sense=1, antisense=2, unknown=3); for(igroup in seq_along(unit$groups)) { group <- unit$groups[[igroup]] groupDirection <- groupDirections[group$groupdirection]; groupDirection <- switch(group$groupdirection, nodirection = 0, sense = 1, antisense = 2, unknown = 3) groupInfo <- as.integer(c(group$natoms, length(group$x), group$ncellsperatom, groupDirection, min(group$atoms, 0))) # Number of bytes: 2*4+2*1+2*4=18 bytes writeBin(groupInfo[1:2], con = con, size = 4, endian = "little") writeBin(groupInfo[3:4], con = con, size = 1, endian = "little") writeBin(groupInfo[5:6], con = con, size = 4, endian = "little") # Number of bytes: 64 bytes suppressWarnings({ writeChar(as.character(names(unit$groups)[igroup]), con = con, nchars = 64, eos = NULL) }) ## Writing each cell in turn cells <- matrix(as.integer(c(group$indexpos, group$x, group$y, group$atom)), ncol = 4) # Number of bytes: 14*nbrOfCells bytes for(icell in seq_along(group$x)) { # Number of bytes: 1*4+2*2+1*4+1*2=14 bytes writeBin(cells[icell, 1], con = con, size = 4, endian = "little") writeBin(cells[icell, 2:3], con = con, size = 2, endian = "little") writeBin(cells[icell, 4], con = con, size = 4, endian = "little") writeChar(as.character(c(group$pbase[icell], group$tbase[icell])), con = con, nchars = c(1,1), eos = NULL) } # for (icell ...) } # for (igroup ...) } # .writeCdfUnit() .writeCdfQcUnit <- function(qcunit, con) { ## 2. Actually write the qcunit type <- qcunit$type; if (!is.numeric(type)) { type <- switch(type, unknown = 0, checkerboardNegative = 1, checkerboardPositive = 2, hybeNegative = 3, hybePositive = 4, textFeaturesNegative = 5, textFeaturesPositive = 6, centralNegative = 7, centralPositive = 8, geneExpNegative = 9, geneExpPositive = 10, cycleFidelityNegative = 11, cycleFidelityPositive = 12, centralCrossNegative = 13, centralCrossPositive = 14, crossHybeNegative = 15, crossHybePositive = 16, SpatialNormNegative = 17, SpatialNormPositive = 18) } # Write 2 + 4 bytes nbrOfBytes <- 6; qcunitInfo <- as.integer(c(type, qcunit$ncells)) writeBin(qcunitInfo[1], con = con, size = 2, endian = "little") writeBin(qcunitInfo[2], con = con, size = 4, endian = "little") # Write 2 + 4 bytes nCells <- length(qcunit$x); nbrOfBytes <- 7*nCells; cells <- matrix(as.integer(c(qcunit$x, qcunit$y, qcunit$length, qcunit$pm, qcunit$background)), ncol = 5) for(icell in seq_along(qcunit$x)) { writeBin(cells[icell, 1:2], con = con, size = 2, endian = "little") writeBin(cells[icell, 3:5], con = con, size = 1, endian = "little") } } # .writeCdfQcUnit() ############################################################################ # HISTORY: # 2013-06-29 # o BUG FIX: Since affxparser 1.30.2/1.31.2, .writeCdfUnit() encoded unit # types incorrectly, iff specified as integers. # o BUG FIX: Likewise, .writeCdfUnit() has always encoded unit directions # incorrectly, iff specified as integers. Moreover, .writeCdfQcUnit() # has always encoded unit types incorrectly, iff specified as integers. # 2013-05-25 /HB # o Removed all gc() in .initializeCdf(). # 2013-01-07 /HB # o GENERALIZATION: .writeCdfUnit() now also encodes unit types # 'genotypingcontrol' and 'expressioncontrol'. # o BUG FIX: .writeCdfUnit() incorrectly encoded the 'unknown' unit type # as 5 and not 0. # 2008-08-09 /HB # o BUG FIX: .writeCdfUnit() did output unit type 'resequencing' and 'tag' # as 4 and 3, and not 3 and 4, respectively. # 2007-11-13 /KH # o BUG FIX: The error message in internal .initializeCdf() would mention # 'qcUnitLengths' when it was meant to say 'unitLengths'. # 2007-07-13 /HB # o While writing unit names in .initializeCdf(), quite a few copies were # created using up a lot of memory. By removing unused objects and # writing unit names in chunks memory usage is now stable and < 200MB. # 2007-02-01 /HB # o Updated to camel case as much as possible to match JBs updates in the # branch. # o Removed non-used arguments 'unitpositions' and 'qcunitpositions' from # .initializeCdf(). # 2007-01-10 /HB # o Added writeCdfHeader(), writeCdfQcUnits() and writeCdfUnits(). With # these it is now possible to build up the CDF in chunks. # o Removed obsolete arguments 'addName' and 'addPositions' and all related # code. Internal variable 'positions' is not needed anymore. # There are no more seek():s in the code. # o Removed obsolete .writeCdfUnit2(). # o Now only every 1000th unit (instead of 100th) is reported. It is now # also a count down. # 2006-12-18 /KS # o Make global replacement "block" -> "group" to maintain consistency # with other code, pursuant to communication from KH. # 2006-10-25 /HB (+KS) # o BUG FIX: .initializeCdf() was writing false file offset for QC units # when the number QC nUnits were zero. This would core dump readCdfNnn(). # 2006-09-21 /HB # o BUG FIX: The 'atom' and 'indexpos' fields were swapped. # o Now suppressing warnings "writeChar: more characters requested..." in # writeCdf(). # 2006-09-11 /HB # o BUG FIX: nRows & nCols were swapped in the CDF header. # 2006-09-09 /HB # o Updated writeCdf() has been validate with compareCdfs() on a few arrays. # o With the below "optimizations" writeCdf() now writes Hu6800.CDF with # units in 130s compared to 140s. # o Now initializeCdf() dumps all unit names at once by first building a # raw vector. This is now much faster than before. # o Now writeCdf() does not seek() around in the file anymore. This should # speed up writing at least a bit. # o Made some optimization, which speeds up the writing a bit. Jumping # around in the file with seek() is expensive and should be avoided. # o Rename writeUnit() to writeCdfUnit() and same for the QC function. # o Added more verbose output and better errror messages for writeCdf(). # 2006-09-07 /HB # o Maybe initalizeCdf(), writeUnit(), and writeQcUnit() should be made # private functions of this package. # o Removed textCdf2binCdf() skeleton. See convertCdf() instead. # o Updated writeCdf() such that the connection is guaranteed to be closed # regardless. ############################################################################ affxparser/R/writeCdfHeader.R0000644000175200017520000000770614516003651017154 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction writeCdfHeader # # @title "Writes a CDF header" # # @synopsis # # \description{ # @get "title". # \emph{This method is not intended to be used explicitly. # To write a CDF, use @see "writeCdf" instead.} # } # # \arguments{ # \item{con}{An open @connection to which nothing has been written.} # \item{cdfHeader}{A CDF header @list structure.} # \item{unitNames}{A @character @vector of all unit names.} # \item{qcUnitLengths}{An @integer @vector of all the number of bytes # in each of the QC units.} # \item{unitLengths}{An @integer @vector of all the number of bytes # in each of the (ordinary) units.} # \item{verbose}{An @integer specifying how much verbose details are # outputted.} # } # # \value{ # Returns nothing. # } # # @author "HB" # # \seealso{ # This method is called by @see "writeCdf". # See also @see "writeCdfQcUnits" and @see "writeCdfUnits". # } # # @keyword "file" # @keyword "IO" # @keyword "internal" #*/######################################################################### writeCdfHeader <- function(con, cdfHeader, unitNames, qcUnitLengths, unitLengths, verbose=0) { if (verbose >= 1) cat(" Writes CDF header and unit names...\n"); .initializeCdf(con = con, nRows = cdfHeader$nrows, nCols = cdfHeader$ncols, nUnits = cdfHeader$nunits, nQcUnits = cdfHeader$nqcunits, refSeq = cdfHeader$refseq, unitnames = unitNames, qcUnitLengths = qcUnitLengths, unitLengths = unitLengths ); if (verbose >= 1) cat(" Writes CDF header and unit names...done\n"); } # writeCdfHeader() ############################################################################ # HISTORY: # 2007-02-01 /HB # o Added Rdoc comments. # 2007-01-10 /HB # o Added writeCdfHeader(), writeCdfQcUnits() and writeCdfUnits(). With # these it is now possible to build up the CDF in chunks. # o Removed obsolete arguments 'addName' and 'addPositions' and all related # code. Internal variable 'positions' is not needed anymore. # There are no more seek():s in the code. # o Removed obsolete .writeCdfUnit2(). # o Now only every 1000th unit (instead of 100th) is reported. It is now # also a count down. # 2006-12-18 /KS # o Make global replacement "block" -> "group" to maintain consistency # with other code, pursuant to communication from KH. # 2006-10-25 /HB (+KS) # o BUG FIX: .initializeCdf() was writing false file offset for QC units # when the number QC nunits were zero. This would core dump readCdfNnn(). # 2006-09-21 /HB # o BUG FIX: The 'atom' and 'indexpos' fields were swapped. # o Now suppressing warnings "writeChar: more characters requested..." in # writeCdf(). # 2006-09-11 /HB # o BUG FIX: nrows & ncols were swapped in the CDF header. # 2006-09-09 /HB # o Updated writeCdf() has been validate with compareCdfs() on a few arrays. # o With the below "optimizations" writeCdf() now writes Hu6800.CDF with # units in 130s compared to 140s. # o Now initializeCdf() dumps all unit names at once by first building a # raw vector. This is now much faster than before. # o Now writeCdf() does not seek() around in the file anymore. This should # speed up writing at least a bit. # o Made some optimization, which speeds up the writing a bit. Jumping # around in the file with seek() is expensive and should be avoided. # o Rename writeUnit() to writeCdfUnit() and same for the QC function. # o Added more verbose output and better errror messages for writeCdf(). # 2006-09-07 /HB # o Maybe initalizeCdf(), writeUnit(), and writeQcUnit() should be made # private functions of this package. # o Removed textCdf2binCdf() skeleton. See convertCdf() instead. # o Updated writeCdf() such that the connection is guaranteed to be closed # regardless. ############################################################################ affxparser/R/writeCdfQcUnits.R0000644000175200017520000000726414516003651017351 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction writeCdfQcUnits # # @title "Writes CDF QC units" # # @synopsis # # \description{ # @get "title". # \emph{This method is not intended to be used explicitly. # To write a CDF, use @see "writeCdf" instead.} # } # # \arguments{ # \item{con}{An open @connection to which a CDF header already has # been written by @see "writeCdfHeader".} # \item{cdfQcUnits}{A @list structure of CDF QC units as returned by # @see "readCdf" (\emph{not} @see "readCdfUnits").} # \item{verbose}{An @integer specifying how much verbose details are # outputted.} # } # # \value{ # Returns nothing. # } # # @author "HB" # # \seealso{ # This method is called by @see "writeCdf". # See also @see "writeCdfHeader" and @see "writeCdfUnits". # } # # @keyword "file" # @keyword "IO" # @keyword "internal" #*/######################################################################### writeCdfQcUnits <- function(con, cdfQcUnits, verbose=0) { if (verbose >= 1) { cat(" Writes QC units...\n"); if(verbose >= 2) cat(" Units left: "); } nbrOfQCUnits <- length(cdfQcUnits); for(kk in seq_len(nbrOfQCUnits)) { if(verbose >= 2) { if (kk %% 1000 == 0) cat(nbrOfQCUnits-kk, ", ", sep=""); } .writeCdfQcUnit(qcunit=.subset2(cdfQcUnits, kk), con=con); } if(verbose >= 2) cat("0\n"); if (verbose >= 1) cat(" Writes QC units...done\n"); } # writeCdfQcUnits() ############################################################################ # HISTORY: # 2007-02-01 /HB # o Added Rdoc comments. # 2007-01-10 /HB # o Added writeCdfHeader(), writeCdfQcUnits() and writeCdfUnits(). With # these it is now possible to build up the CDF in chunks. # o Removed obsolete arguments 'addName' and 'addPositions' and all related # code. Internal variable 'positions' is not needed anymore. # There are no more seek():s in the code. # o Removed obsolete .writeCdfUnit2(). # o Now only every 1000th unit (instead of 100th) is reported. It is now # also a count down. # 2006-12-18 /KS # o Make global replacement "block" -> "group" to maintain consistency # with other code, pursuant to communication from KH. # 2006-10-25 /HB (+KS) # o BUG FIX: .initializeCdf() was writing false file offset for QC units # when the number QC nunits were zero. This would core dump readCdfNnn(). # 2006-09-21 /HB # o BUG FIX: The 'atom' and 'indexpos' fields were swapped. # o Now suppressing warnings "writeChar: more characters requested..." in # writeCdf(). # 2006-09-11 /HB # o BUG FIX: nrows & ncols were swapped in the CDF header. # 2006-09-09 /HB # o Updated writeCdf() has been validate with compareCdfs() on a few arrays. # o With the below "optimizations" writeCdf() now writes Hu6800.CDF with # units in 130s compared to 140s. # o Now initializeCdf() dumps all unit names at once by first building a # raw vector. This is now much faster than before. # o Now writeCdf() does not seek() around in the file anymore. This should # speed up writing at least a bit. # o Made some optimization, which speeds up the writing a bit. Jumping # around in the file with seek() is expensive and should be avoided. # o Rename writeUnit() to writeCdfUnit() and same for the QC function. # o Added more verbose output and better errror messages for writeCdf(). # 2006-09-07 /HB # o Maybe initalizeCdf(), writeUnit(), and writeQcUnit() should be made # private functions of this package. # o Removed textCdf2binCdf() skeleton. See convertCdf() instead. # o Updated writeCdf() such that the connection is guaranteed to be closed # regardless. ############################################################################ affxparser/R/writeCdfUnits.R0000644000175200017520000000754714516003651017071 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction writeCdfUnits # # @title "Writes CDF units" # # @synopsis # # \description{ # @get "title". # \emph{This method is not intended to be used explicitly. # To write a CDF, use @see "writeCdf" instead.} # } # # \arguments{ # \item{con}{An open @connection to which a CDF header and QC units # already have been written by @see "writeCdfHeader" and # @see "writeCdfQcUnits", respectively.} # \item{cdfUnits}{A @list structure of CDF units as returned by # @see "readCdf" (\emph{not} @see "readCdfUnits").} # \item{verbose}{An @integer specifying how much verbose details are # outputted.} # } # # \value{ # Returns nothing. # } # # @author "HB" # # \seealso{ # This method is called by @see "writeCdf". # See also @see "writeCdfHeader" and @see "writeCdfQcUnits". # } # # @keyword "file" # @keyword "IO" # @keyword "internal" #*/######################################################################### writeCdfUnits <- function(con, cdfUnits, verbose=0) { nbrOfUnits <- length(cdfUnits); if (verbose >= 1) { cat(" Writes ", nbrOfUnits, " units...\n", sep=""); if(verbose >= 2) cat(" Units left: "); } unitNames <- names(cdfUnits); for(kk in seq_len(nbrOfUnits)) { if(verbose >= 2) { if (kk %% 1000 == 0) cat(nbrOfUnits-kk, ", ", sep=""); } .writeCdfUnit(unit=.subset2(cdfUnits, kk), unitname=.subset(unitNames, kk), con=con); } if(verbose >= 2) cat("0\n"); if (verbose >= 1) cat(" Writes ", nbrOfUnits, " units...done\n", sep=""); } # writeCdfUnits() ############################################################################ # HISTORY: # 2007-02-01 /HB # o Added Rdoc comments. # 2007-01-10 /HB # o Added writeCdfHeader(), writeCdfQcUnits() and writeCdfUnits(). With # these it is now possible to build up the CDF in chunks. # o Removed obsolete arguments 'addName' and 'addPositions' and all related # code. Internal variable 'positions' is not needed anymore. # There are no more seek():s in the code. # o Removed obsolete .writeCdfUnit2(). # o Now only every 1000th unit (instead of 100th) is reported. It is now # also a count down. # 2006-12-18 /KS # o Make global replacement "block" -> "group" to maintain consistency # with other code, pursuant to communication from KH. # 2006-10-25 /HB (+KS) # o BUG FIX: .initializeCdf() was writing false file offset for QC units # when the number QC nunits were zero. This would core dump readCdfNnn(). # 2006-09-21 /HB # o BUG FIX: The 'atom' and 'indexpos' fields were swapped. # o Now suppressing warnings "writeChar: more characters requested..." in # writeCdf(). # 2006-09-11 /HB # o BUG FIX: nrows & ncols were swapped in the CDF header. # 2006-09-09 /HB # o Updated writeCdf() has been validate with compareCdfs() on a few arrays. # o With the below "optimizations" writeCdf() now writes Hu6800.CDF with # units in 130s compared to 140s. # o Now initializeCdf() dumps all unit names at once by first building a # raw vector. This is now much faster than before. # o Now writeCdf() does not seek() around in the file anymore. This should # speed up writing at least a bit. # o Made some optimization, which speeds up the writing a bit. Jumping # around in the file with seek() is expensive and should be avoided. # o Rename writeUnit() to writeCdfUnit() and same for the QC function. # o Added more verbose output and better errror messages for writeCdf(). # 2006-09-07 /HB # o Maybe initalizeCdf(), writeUnit(), and writeQcUnit() should be made # private functions of this package. # o Removed textCdf2binCdf() skeleton. See convertCdf() instead. # o Updated writeCdf() such that the connection is guaranteed to be closed # regardless. ############################################################################ affxparser/R/writeCelHeader.R0000644000175200017520000001510014516003651017146 0ustar00biocbuildbiocbuild#########################################################################/** # @RdocFunction writeCelHeader # # @title "Writes a CEL header to a connection" # # @synopsis # # \description{ # @get "title". # } # # \arguments{ # \item{con}{A @connection.} # \item{header}{A @list structure describing the CEL header, similar # to the structure returned by @see "readCelHeader".} # \item{outputFormat}{A @character string specifying the output format. # Currently only CEL version 4 (binary;XDA) are supported.} # \item{...}{Not used.} # } # # \value{ # Returns (invisibly) the pathname of the file created. # } # # \details{ # Currently only CEL version 4 (binary;XDA) headers can be written. # } # # \section{Redundant fields}{ # The CEL v4 header contains redundant information. To avoid inconsistency # this method generates such redundant values from the original values. # This is consistent to how the CEL reader in Fusion SDK does it, cf. # @see "readCelHeader". The redundant information is in the (CEL v3) # \code{header} field, which contains the CEL header information as it # would appear in the CEL v3 format. This in turn contains a DAT header # field reproducing the DAT header from the image analysis. It is from # this DAT header that the chip type is extracted. # } # # @author "HB" # # @keyword "file" # @keyword "IO" # @keyword "internal" #*/######################################################################### writeCelHeader <- function(con, header, outputVersion=c("4"), ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - writeDWord <- function(con, data, ...) { writeBin(con=con, as.integer(data), size=4, endian="little"); } writeInteger <- function(con, data, ...) { writeBin(con=con, as.integer(data), size=4, endian="little"); } writeString <- function(con, str, ...) { # Strings must not be null terminated! /HB 2006-09-10 writeInteger(con=con, nchar(str)); writeChar(con=con, str, eos=NULL); } writeCelHeaderV4 <- function(con, header, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validation # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get the version of the CEL header version <- .getCelHeaderVersion(header); if (version == "1") { # A CEL header in the Calvin CEL file format header <- .getCelHeaderV4(header); } else if (version == "3") { # A CEL header in the ASCII (v3) CEL file format stop("Failed to write CEL v4 header. Argument 'header' is a CEL v3 header, which is not supported: ", version); } else if (version == "4") { # A CEL header in the XBR (v4) CEL file format header <- .getCelHeaderV4(header); } else { stop("Failed to write CEL v4 header. Unsupported CEL header version: ", version); } # Number of columns and rows, and total number of cells if (header$rows < 1) stop("Number of rows must be at least one: ", rows); if (header$cols < 1) stop("Number of columns must be at least one: ", cols); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Unwrap/wrap CEL header # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # First, try to unwrap CEL header in case it isn't. if (header$version == "4") { tryCatch({ header <- .unwrapCelHeaderV4(header); }, error = function(ex) { # If this happens, we assumes that the header was already unwrapped. }); } # Then wrap it up again to make sure it has the right format. This will # also override redundant fields. header <- .wrapCelHeaderV4(header); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Write to connection # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Magic number (always set to 64) writeInteger(con=con, 64); # Version number (always set to 4) writeInteger(con=con, 4); rows <- header$rows; cols <- header$cols; writeInteger(con=con, c(rows, cols, rows*cols)); # "Header as defined in the HEADER section of the version 3 CEL # files. The string contains TAG=VALUE separated by a space where # the TAG names are defined in the version 3 HEADER section." # Note that Fusion SDK and hence readCelHeader() ignores the Algorithm # and Algorithm Parameters part of this header; instead it reads it # from the CEL header (below) and adds it to this when returned. # Thus, if you wish to update any of these two fields you need to udate # the ones below (in addition to here?!?). writeString(con=con, header$header); # "The algorithm name used to create the CEL file". writeString(con=con, header$algorithm); # "The parameters used by the algorithm. The format is TAG:VALUE # pairs separated by semi-colons or TAG=VALUE pairs separated # by spaces." writeString(con=con, header$parameters); # "Cell margin used for computing the cells intensity value." writeInteger(con=con, header$cellmargin); # "Number of outlier cells." writeDWord(con=con, header$noutliers); # "Number of masked cells." writeDWord(con=con, header$nmasked); } # writeCelHeaderV4() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Open CEL file if (!inherits(con, "connection")) stop("Argument 'con' must be a connection: ", class(con)); # Argument 'outputVersion': outputVersion <- match.arg(outputVersion); if (outputVersion == "4") { writeCelHeaderV4(con=con, header=header, ...); } else { stop("Unsupported output version: ", outputVersion); } } # writeCelHeader() ############################################################################ # HISTORY: # 2007-08-16 # o Now writeCelHeader() can write CEL header given CCG v1 CEL headers, # in addition to v4 headers. # o Added argument 'outputVersion' to writeCelHeader(). It still only # supports version 4, though. # 2007-08-15 # o Created internal function writeCelHeaderV4(). # 2006-09-10 # o BUG FIX: A hard to find bug was that strings in the CEL header must # *not* be written with a null terminator. # 2006-09-07 # o With help of all the private unwrap and wrap functions, it seems to # work now. # 2006-09-03 # o Created. ############################################################################ affxparser/R/writeTpmap.R0000644000175200017520000000342414516003651016421 0ustar00biocbuildbiocbuildwriteTpmap <- function(filename, bpmaplist, verbose = 0){ writeSequence <- function(seq){ if(length(setdiff(c("seqInfo", "pmx", "pmy", "probeseq", "startpos", "strand"), names(seq))) != 0 || length(setdiff(c("groupname", "version", "name"), names(seq$seqInfo))) != 0) { cat(" ... skipping a sequence due to missing slots\n") return(NULL) } seqInfo <- seq$seqInfo if(length(setdiff(c("groupname", "version", "name"), names(seqInfo))) != 0) { stop("Need a seqInfo component with 'groupname', 'version', 'name' sub-components") } writeLines(paste("#seq_group_name", seqInfo$groupname), con = out, sep = "\n") writeLines(paste("#version", seqInfo$version), con = out, sep = "\n") if(!is.null(seqInfo$parameters)) { for(tag in names(seqInfo$parameters)) writeLines(paste("#", tag, " ", seqInfo$parameters[tag], sep = ""), con = out, sep = "\n") } hits <- t(do.call(cbind, c(seq[c("probeseq", "strand")], list(groupname = rep(seqInfo$name, length(seq$pmx))), seq[c("startpos", "pmx", "pmy", "mmx", "mmy", "matchscore")]))) write(hits, file = out, ncolumns = nrow(hits), append = TRUE) return(NULL) } # writeSequence() if (file.exists(filename)) { stop("Could not write TPMAP file. File already exists: ", filename) } out <- file(filename, open = "w") on.exit(close(out)) for(i in seq_along(bpmaplist)) { if(verbose) cat(paste("Writing sequence", names(bpmaplist)[i], "\n")) writeSequence(bpmaplist[[i]]) } invisible(NULL) } affxparser/R/zzz.R0000644000175200017520000000014514516003651015117 0ustar00biocbuildbiocbuild.onUnload <- function (libpath) { ## covr: skip=1 library.dynam.unload("affxparser", libpath) } affxparser/cleanup0000755000175200017520000001151614516022540015315 0ustar00biocbuildbiocbuildrm -f src/affxparser.so\ src/000.init.o\ src/R_affx_cdf_parser.o\ src/R_affx_bpmap_parser.o\ src/R_affx_clf_pgf_parser.o\ src/R_affx_cel_parser.o\ src/R_affx_cdf_extras.o\ src/R_affx_chp_parser.o\ src/fusion_sdk/file/FileIO.o\ src/fusion_sdk/file/BPMAPFileWriter.o\ src/fusion_sdk/file/CELFileData.o\ src/fusion_sdk/file/CDFFileData.o\ src/fusion_sdk/file/BPMAPFileData.o\ src/fusion_sdk/file/CHPFileData.o\ src/fusion_sdk/file/FileWriter.o\ src/fusion_sdk/file/TsvFile/ClfFile.o\ src/fusion_sdk/file/TsvFile/PgfFile.o\ src/fusion_sdk/file/TsvFile/TsvFile.o\ src/fusion_sdk/util/MsgStream.o\ src/fusion_sdk/util/AffxByteArray.o\ src/fusion_sdk/util/Fs.o\ src/fusion_sdk/util/TableFile.o\ src/fusion_sdk/util/Verbose.o\ src/fusion_sdk/util/Err.o\ src/fusion_sdk/util/Convert.o\ src/fusion_sdk/util/RowFile.o\ src/fusion_sdk/util/AffxConv.o\ src/fusion_sdk/util/Util.o\ src/fusion_sdk/calvin_files/parsers/src/GenericDataHeaderReader.o\ src/fusion_sdk/calvin_files/parsers/src/CelFileReader.o\ src/fusion_sdk/calvin_files/parsers/src/DataSetReader.o\ src/fusion_sdk/calvin_files/parsers/src/CDFFileReader.o\ src/fusion_sdk/calvin_files/parsers/src/CHPQuantificationDetectionFileReader.o\ src/fusion_sdk/calvin_files/parsers/src/CHPQuantificationFileReader.o\ src/fusion_sdk/calvin_files/parsers/src/CHPFileReader.o\ src/fusion_sdk/calvin_files/parsers/src/GenericFileReader.o\ src/fusion_sdk/calvin_files/parsers/src/FileInput.o\ src/fusion_sdk/calvin_files/parsers/src/CHPMultiDataFileReader.o\ src/fusion_sdk/calvin_files/parsers/src/DataGroupReader.o\ src/fusion_sdk/calvin_files/parsers/src/DataGroupHeaderReader.o\ src/fusion_sdk/calvin_files/parsers/src/CHPTilingFileReader.o\ src/fusion_sdk/calvin_files/parsers/src/FileHeaderReader.o\ src/fusion_sdk/calvin_files/parsers/src/DataSetHeaderReader.o\ src/fusion_sdk/calvin_files/parameter/src/ParameterNameValueType.o\ src/fusion_sdk/calvin_files/utils/src/AffymetrixGuid.o\ src/fusion_sdk/calvin_files/utils/src/StringUtils.o\ src/fusion_sdk/calvin_files/utils/src/DateTime.o\ src/fusion_sdk/calvin_files/utils/src/FileUtils.o\ src/fusion_sdk/calvin_files/utils/src/checksum.o\ src/fusion_sdk/calvin_files/fusion/src/FusionProbeSetResults.o\ src/fusion_sdk/calvin_files/fusion/src/FusionCHPTilingData.o\ src/fusion_sdk/calvin_files/fusion/src/FusionCDFData.o\ src/fusion_sdk/calvin_files/fusion/src/FusionBPMAPData.o\ src/fusion_sdk/calvin_files/fusion/src/FusionCHPGenericData.o\ src/fusion_sdk/calvin_files/fusion/src/FusionCHPMultiDataAccessor.o\ src/fusion_sdk/calvin_files/fusion/src/FusionCHPQuantificationDetectionData.o\ src/fusion_sdk/calvin_files/fusion/src/FusionCHPQuantificationData.o\ src/fusion_sdk/calvin_files/fusion/src/FusionCHPData.o\ src/fusion_sdk/calvin_files/fusion/src/FusionCDFQCProbeSetNames.o\ src/fusion_sdk/calvin_files/fusion/src/FusionCHPMultiDataData.o\ src/fusion_sdk/calvin_files/fusion/src/FusionCHPLegacyData.o\ src/fusion_sdk/calvin_files/fusion/src/FusionCELData.o\ src/fusion_sdk/calvin_files/fusion/src/GCOSAdapter/GCOSCELDataAdapter.o\ src/fusion_sdk/calvin_files/fusion/src/GCOSAdapter/GCOSCHPDataAdapter.o\ src/fusion_sdk/calvin_files/fusion/src/CalvinAdapter/CalvinCHPDataAdapter.o\ src/fusion_sdk/calvin_files/fusion/src/CalvinAdapter/CalvinCELDataAdapter.o\ src/fusion_sdk/calvin_files/data/src/CHPUniversalEntry.o\ src/fusion_sdk/calvin_files/data/src/DataGroupHeader.o\ src/fusion_sdk/calvin_files/data/src/CHPExpressionEntry.o\ src/fusion_sdk/calvin_files/data/src/CDFQCProbeSetInformation.o\ src/fusion_sdk/calvin_files/data/src/CHPQuantificationData.o\ src/fusion_sdk/calvin_files/data/src/CDFProbeSetInformation.o\ src/fusion_sdk/calvin_files/data/src/CDFProbeGroupInformation.o\ src/fusion_sdk/calvin_files/data/src/ColumnInfo.o\ src/fusion_sdk/calvin_files/data/src/CHPMultiDataData.o\ src/fusion_sdk/calvin_files/data/src/CDFQCProbeInformation.o\ src/fusion_sdk/calvin_files/data/src/GenericDataHeader.o\ src/fusion_sdk/calvin_files/data/src/DataGroup.o\ src/fusion_sdk/calvin_files/data/src/CDFProbeInformation.o\ src/fusion_sdk/calvin_files/data/src/CELData.o\ src/fusion_sdk/calvin_files/data/src/CHPTilingData.o\ src/fusion_sdk/calvin_files/data/src/DataSet.o\ src/fusion_sdk/calvin_files/data/src/CDFData.o\ src/fusion_sdk/calvin_files/data/src/FileHeader.o\ src/fusion_sdk/calvin_files/data/src/DataSetHeader.o\ src/fusion_sdk/calvin_files/data/src/CHPQuantificationDetectionData.o\ src/fusion_sdk/calvin_files/data/src/CHPData.o\ src/fusion_sdk/calvin_files/data/src/CHPBackgroundZone.o\ src/fusion_sdk/calvin_files/data/src/GenericData.o\ src/fusion_sdk/calvin_files/data/src/CHPGenotypeEntry.o\ src/fusion_sdk/calvin_files/exception/src/ExceptionBase.o affxparser/inst/0000755000175200017520000000000014516022540014711 5ustar00biocbuildbiocbuildaffxparser/inst/WORDLIST0000644000175200017520000000104614516003651016106 0ustar00biocbuildbiocbuildantisense AppVeyor Benchmarking Beroukhim bijective Bpmap BPMAP Bs calvin CCG cdf cel CEL cellmargin CELs chiptype CHP CLF CMD codebase codename CustomSeq dat dChip FHCRC GC GCOS GeneArray gtype Hu indexpos Kasper LaFramboise lnk macOS matchscore Meyerson MMA MMB MMcAF MMcAR MMcBF MMcBR MMoAF MMoAR MMoBF MMoBR MMs Mth nd nucleotides NxK PGF PLASQ PLoS PMA PMB PMcAF PMcAR PMcBF PMcBR PMoAF PMoAR PMoBF PMoBR pq pre Pre probelength probeset probesets resequencing Rtools SDK subgrids th TPMAP unrotate unrotated WEHI WR Xcode XDA xrange yrange Zhao affxparser/inst/extras/0000755000175200017520000000000014516003651016221 5ustar00biocbuildbiocbuildaffxparser/inst/extras/easternEgg.gz0000644000175200017520000006171714516003651020663 0ustar00biocbuildbiocbuild‹ ¼½‡C[ÙµïoªèH „@ z¯¢wˆŽ+Ƹá†+ØÆÆÞA½÷^½›î:-™¼äÝÜ÷Þ½¹ùÝÿæ·ö9ÆÏL2É6ª`!}Î*ßµÎÞ›'~óáêú•']<~ÛáåH$’È¡ÁDRpp F0™L ‚'ý½}¼±ááéåéEpüWì-¼/4¼Ñ•çϼº»«Û 7wü®»ûçßûípqùâ±+_}gžžø®<%1_"`"™B"SBÈä2‰H ðEÃÛÛǃàí}DÌÃÝ #FðôÄ‘ƒ_íùb..*W7W77÷.±O¶†£sý¹Oíéé…qö?}ç?;Þ8±`r0ˆ¬ð02™F &€ýùùù¹{ù`Ä„O¿=>"Fðô À›À8~FÌÍ Ž4†ØÕÝå„Ë?Ÿ˜‹«‹+77W8@_%扷×gƒ€¿•Xfc! †®Âi¡”ð°PJòMb` NÌ lÊñ¢žÈ¦>#†`aÄpzdž›3’¸ÿÓˆ¹‚û¹`îz8¼ß Ž“ûWx…gDù4ÐüE[#øùú¡ÈF EĨ´° cPP0"æ#xùàèéˆ÷J€…sôÄŒükÄP sÿ'JG¯~üõÝÝ¿þ°¼ñ‹ ÔGÄŽ"22ϯX;ÁÇßÏ/0($,<<<ŒN…A£EÐéôðpTr08k`€Áƒ/_¸ƒ°ÀQòôú‚˜»§ã·¼>#æòÉ´Ü]{b.®¿*l{:y!NŽË8oÌCÐ@‰ ww~°ÿ…áóôöôó ð¥ÁÓ¢ÑÂ#c#£¢¢è´p ØX(˜Ñ—Ä|€•·/Bæ…9³70;:8@ÓÓÃIÌÉr`æê„çöÛsÿ;p!>è#á'9o9o‚ÓwQjðøäµžþþ~þ€!€`…Ó"À¾b£¢bbÀä€%<<44Ø@ª€ Càà5ÞGV øÍ^1W7@ä¹bè&ˆJÇxÅÄ$ÅÄ&&%¦¤¤$ãÌ€Z 我ù-"" òñòõs¾.&w½¼ ®î®ŸG~̰ TpA¢Üå&~"Ïÿ.…ïâö«ØgÀ Ò:d??Ð è‚}>ŒßO:ì„—N‹‹OHŽKJMKMIIX‰ ñq ñ±ôˆ°@¿`oxÑ€à  z}ôKPÐô1‡î£×÷vÞ÷ω9\ÑõŸ UÝÿ&`žÎ¤þÉ!Žàç#sŸß0?ôïh'çí–›Ÿ˜”‘™žŸ–„¬ ™ZRBB|LdÑVÁ$@†iZüW€áá¿Üõ1ÏÏÞ«èVW‡ýÖÄܽGbÀüý2Äü awO¢Ðýiâ’#¢“RRSÒ²2ÒÓÀ)Óá—”œ’’–$ã"ˆÞþÁ^þ$HÓ‘É$ç‹án WŽW³ó:VOøxƒ‘á" jJ¬,Fe ^ ÿ‹bØ'Ĥ¿ß1NÄϯ~nø£¨ä´´„„ÔTðÄÔô´ÂRÙéɉIñ)ééðLRRj4Ñ—H Á?¨¥ÈA¡áˆTPèµ>ÝóÇß1#óBÄP¼Dµ< êƒð[é1—¯)WÂOûž„/"˜`À1Z˜Ç`oŸDÄ®ˆŽ'H_Ú ƒ™_XVjbVzzFFzFZzjjFZZVNFÜffffd §c‰¾$j9ŒB !‡P)Á¡2^BÙ‘ÈèöêØÛñG6æéî¹®àW`2Þ¹'\£RÁYüƒµäO›7X ¯lŸó:ÂŽ’Œ€¸ uº&aÿ‚ðÇ?ñ[løÓÒâ²Y™ ÊD À•›—Ÿ“•…=ìI¡‡S##h± ‡Ú¨† ¥2ì°Qˆ &àpøCDðõöð›óù$¦±òÔÃQ®ý–.IðrvnÐ  'Ïc‚# Ã>fXD2Š3ÁT0Öà v|´` öÃÇ|6€”˜‘‘ÅÈÎÉÎÊDYŒÌ¬¬ì¢‚¼‚ü|øÊÎÊÉŠ Œˆ¦Òd‹¥‡†SC©aa¡á`qT* £†á]5J•‚ïÕ|dPdxÈ €x#x¼?¹æ?”&ݾÛfà"wI܉Dà„» 9Øyô?=ĺ_ا FàˆÇ£)1?#+¯0¿ ¬¤07?¯¨¼@•”–”ä§ÅPƒ¢cÃb @)@£G…ÑÂÀبá´pÔë Ñ(äаÐP¨âÃ)¨I¡Iþð/Àß,Pê÷t8†§§×?Jì‹æùK¸¼pCmfàåàCFa%$„ü“&äˆ&øèQz ¤1R2‹ ‹J«Ê‹‹ŠÊ˜UÌòÒòòò²òòlä'ņÆ&„ǧ¥&'$B@hQÑQt( ÂÑWxkdDD ÌB©€‘JÆ~?8DŠ› è°÷íCpÖŸ/®/kI¼dþTzê|yç€f"Þ—&(0“áxCÕó墆Q0œ¸Éa }ø$¤è¤„ÔŒœò²šòfu]]-‹Å¬ªf–W2+ò ™•ÙII‘d$sÓ u¦¦&ÇÅÇD!½E§GÅ@%†™Ydh8Î1Œ‡1‡QGp¨@gíï%ææöeÿù®¯ GŽ px$¼GŽ/êpÑhaá´/F8ކNó >Jvñ•Y¹y•§¢²²¦¾¡®–YS[UR\]UXTÍ*KKN¢¤¤Q¢3°ä Ø@ï&CQ‹ª¨¨èxŒ[4Ø¥‘Ñt:=5B(Xs-„Œ ÒoFì'ªYØÿÄËQ"â" ’Š_CH(Ädjx8- ëÚüd„…¡Ž!êH÷Ùà Ð¼Úú*f]9³¦ªŠY]YÕt¦¡²’YÛÀªdÕ––±ê A`D¥ePb²òóPjÈDª-‡*ʨø¸è8PrHÎ%Cý å<=2!ƒ¼‚b'ÊDD§bû‰}¡*_„v¼÷àÈÏè)‡G:T+˜ é (¡`XˆTø>PpÁïÐ#"hè8 ‚GÅcrdzBNSsccSCCC]}}-«¶¦ñd}UuéÆú¦“µU 'kò²²³#Y”¸‚BHùyY¹i鹌4(R““’Ó3Ó¡XH±›˜œ„0ÆFÅÄFGG„CF ÑBqƒvCýËÏû¢ÝŽ?uT‘r ` †ˆ"i‘pt£¢"ñVıO£5!Éaýð”ÂÆsgNŸ:uòä©SÍÍÍ hÔÕ<ÓØrîL}íéÓùyù©±ù¹¡ …åee%ÅÅ%¹9ÙÙy¹ pÒtˆméÙ999 „” ©ñq1‰ñ æèphÂáíP1½‹„!N “f7±¯¨°cæÉ¯›áÚÛÝõC)ÛÏ‘åˆx#1ä…X$=è€@­ˆÏF,<‡† …£h‡œ˜–\wþüù¶ó­­çΞ9—3§š[Nž¾Ø~²õÒÅ .·£,šB+*!ÇV@­®e–—#sËÍÍÉõ–™•_ˆ=ÌÏIOMŠƒEÇ _L,¤ÔðPmËþqb.Ÿìð$x|Ö¿ñGŠ» yîîáÑ 'F$a •LAˆÑ!x«/I}F-õïá!Ï¥a ‚šzªýòå‹W.µ]h»pµózç­Ëç[Û®v\:súüµ7®5•–•••gÇ–—‘cÊP­=U ÄG H‘‚‚¼ü¼¼Ü‚ÒÒRxX˜‘ªò@v–d-&ŽŽA"…P†õ¡ü‘Iü}Ä>ù(EzñB-;¼øû¬®ñó?ª±¼B(X@-Á¸/iÅÿ„Y4ä·hÌÖ"qk‹ˆJ/i!>ôyBB2vFü1|Î@KEAJMŽ›Wr¾ëaç­[7n\¿ýôiÏ£ÇÝ?¼{ÿa÷ã;-¬Úúºº†Ü¸ÚZZãùÖsgΟ?ÝÔØØX_[Ã*.«¬b2+ËJKkjkj«s sÁSssÂH@m\øÝ‘ Õ(Á¨ä$ ÁOø»ˆ}Ùkýñ¤<®”‰ŸÈA=ZäšOBA}Ð_!º"`€ FBBRJ"d±ä¤¤äø‡FR|D/>ÞÑŒŽŠEWQ±ÑQÑ™eµ7ž tß¹ûðÉ£ÞgO_¾xöüYoï³gOŸõ^kjhlnn:Y’Úz2"»©ýÒÅö òÎB¦hlª-),+gVWƒ²êšá'Ë kaÔ0ËËK‹ ²å0r ß’ãbââè i ©£Ö·/~Rð×sý9`x"ôwjyŒ6àö…ZTØùëPÈ‘H{A¼Š‰†7…º¨‰€+9 ’~:jm¥ÃMjã€XJIKNN…ÞpÌU± ›RÁd6\{ÞÓý¨otxôÅãþÁ~4úúú_^<}îìé3g[™ŒöÖØ‚SWn\»zåÒe„íjÛéæÊ’’²ú¦¦zÀTwêÌéðYȳÕP+TV”•æç lš’˜U•‚Ä u·½~51×/gM8*F§’Ç•ØV-"H±öƒC¬CzÄÔ(ð‚p‡>:ðJLNNIËÀ e`—L|d ¹„D&¨LPñÈWœg>â2«XÌJÖõçÏ^MOO¿ìÅÆÈèXß•6 ÓÖv©<¾­£°êì¥[·:o^»yíÊ¥ŽÛWÛÛN×0«[ª–ææöö ­çÁkO5³X5Ì (ä !22R’’ci gBàCý]Ä\¿6k‰T‡’G&…BýN,¼c3n%!ˆÏT)Bø‚4DÆ"8#˜HïŒ '%ÈúY™G#˜aK®‰ˆ. KJM/®­¯­kìz50Ãfóg†§¦§Ùlö \Ï<¹ŠÆ•+W :ïÕTž¾{çæƒûwîÜ…˜w½£ãêå Órölëùsç®\½|ùÂ…Km4t¬š*04H¦¹Y™ Ii qôHÐ!$: õëˆ}ò:Ð蜟£MtÆu$NÑÀ$:¿å.*x°Çò#,%))-5#óØÈÊd0²ŽCí-4TØ ¯´ÜZˆH Íwúú‡ÆÆ¸“c¼Éi‘H p9ÓAlt‚UÝn)}Ø]SuöÁ­wu?xðàþ½;·ožnjn9Óp¹é…+7;.^l?w±ýü9dtõuÕUà›Eyy9`ô)‰¦ÃÃCAÈB¾üUqì¸È?6Íù#vÖYXÐñÊTHͰðÐp¬#€] ØG`í= >Ø hï#,Ù0 ìæäåCMÃ`d†Ð`¤¦—TŸ;Õ¡ýTû‹É©©‰±± ¡`R SHxì©ñ¾®Û·oß¹{÷NWSæƒû¥¥­½Oz_½xÖû¤çq÷£‡@ífë©Ó'›Û®\¿våòÍ®›7:._¼z­ƒvú$Ä·šªrgyyÙéiIñ±1´°P4# Õ•¿‚˜ûOöŽæïÍnÃu”0X’‰†µ"Ð5^êDÒA(b‚’_|BRrê®,(Wò Pó4CwòQ ÃÀÉ1ÔPœOÍj<‹´þ…³çz8B•ZÁc‹õj¥V­•MÏp§^<è†ÑÕÕý°­ìÉËK'¯ =é~ñ’鳞‡ÝÝw.·µžm¿ÙÕu·óþ“[ é.]¿}ùÊ•K—.ž? é´Å¬(/j 3#-1&22ÔÀðþÛ‰¹:'5~²/\Q@‚ô$’ðVi(ÕÑpÀ(t¼&DQ +2Êö1˜òJL)‘޹## ¼\Ô4Pè #öiäCõ—“~îÈÚò›ëÚÛ.ܼ}{F*Q(•R™Åf³˜,Z¹Ò¨å¼zÜÓÓó´çÉÓÞ‹5}œþû÷Æ'‡Gù’ç}JA…tõv]½ríÁÃîçÏîv¿º³óÖÍ;]w:o\ë¸zéÂùs'›X•U%…Ù™`dqÑ1±c!ìȾ°xïïç‹é ˆ_رº›Á…PAd@Ñ1`R±X•ãÔª`\¿!áá+ Ô£“”RQFØCìɼ\|ä1pn§Û®\º=#ç(e2©Â:g·©-svûììœÝ0õòŒç/ ‹^lÏ<2Éå°Ù"ÅðË‘ÑÑ¡þ—¯^ÝïîííyüäÉÃÞgOŸvƒê}tÿö­kÈA[Ïžni¬ª©./)ÌËJKNˆ# ù5Ä\\œèðsÚwôAÌ“ò$d^¬eƒ¬ 9âæýIBaÑ ²c"ŠßéŽì˜q+ÏA«°¸¨¸¸ øBE2F žF¾ŠJAx”—›£èlÇÕk¯TjF©T(mëË+V©eyqqqyqN?Ñ×ÿêÕ«a6{¼¿w@,ã ¼âŠB¡@À™ž™š~ñjàÕ‹Ïžõ‚*èéîyÞûäq7Ê 7;.··ži®b¥EÙé©)ñÑQá” âßLì(‚y9fx£™ ì¦î±™•hJ% ë˜*ÇñÄA´ŠÇA!V‰ètuR2 ÝŽè••“ŸŸ›Á)*))--†ÒªeüôFI F±àâæ†À¢»%®w\{©6èѰ®­Ì®¯Ú×776VíVùðŒa®FÁ•ë”SýB…L"‘IÄ¡€Ë211>>:<06Úß÷êåàèðó‡Ïú_=ô¸§ûÁÎ[×®\8w¶®¾¡º²¬0Ÿ‘ž„ÊMj%5bÿ&bGš~îÎ!¿@äI¸  P±ª:+þ€Mb2„õ$H…‰På$be6P¢ƒì˜TWfV63/dZˆbåeå¨ù€ºZ%……Epqx)º-(®?sûÖ}®ÅÃ`X>ØZßÞ;ØÝÙÝÝ]MŽO²…|•†=>"ÐëM‚A±V¥Tò9 >[(‘ˆ<‡Ãžâq$òÉÁ¡Á¾WÓÃÏzŸ?ÒÕõøAÇÅó­íΜ: Ei}UI~~zB =t?vbÎ÷s¥‹«›s»£¥ƒ:‘ÎÓÿX1DvÄûˆ¨ªãð6¤f2ÒN gRÒŒ‚¨…—––Ò¼³sr1_Ë/WDÀ*P/Æ9à.z ¨´¤̯° ¨¨°|)ò¢Òö›wŸŠfmVËÊááÁáÁÁÁ›Ã7»³œñ©ÉéI¶J/TE/JM†±n‰A§UOJ&ú9b‘Ø  |¾P«U²'FG†‡f8S“㣀îÅÓî{ׯ^:®íìÉæFfqiq|z8•aìoPG ?ëË/ÈŽ~Ggt–/ Ÿ°‹B; êI©ˆR:²%tIÏHÃo0m‰'ÇL⸋.ðÁ²ò¯ Ð·¢ÒRÔ£b¥%eÝ/µ‹óvˆõvëììÖúÊÛoß¾ÿqÃ(bs¸\_#ã \vÏe‘U5úXj5 ZîøÄ”BÆ•X5r‰D,éÔbwfjbdL,ò¹3“cCýý¯ž=y%BÇÅs§šYUÕy c#ia”Ge‰Íÿåi‡hÂ;Ö 8šƒÎòƒ¶§ ® "=Æ C*†Lü ?ÉE­l”s€V^¾#(!^GV…*¯ø‚6ÊLËÊÊË*®÷JWææ¬6ËòüÖÇo>~óÍ÷oÍ`<<>O ^Òsy<ði§jqnN«·ÍÚlv£X®Õèõ:½Z¥TÈ!ÇŠµJHÖ ¹D$äÎLŒô½xÚÓuÿîõ«[[±ÔT J¡ÀVœFö3ŠÂÕᎼ&òöõqö¿ˆDG[0ÕwÅ€Ì)ÃÔ#‡qtâ|~n&° TTÀ*>áª`VVT2Œ¾°¶#|ååÌÖ'¢ÅÈK‹Ë«K+Kkßþðý?üî‡×ZH(„/í’Q(йƒS6ø±¥…¹y$=ìv­Ú æflZ%0³jÄxT“ªUR©‚ {ÕÛýà^çËÎ5Ö±* 2ÓÓ¢h¡¡” ??Ç¢¯°£¢hyârÂ÷¨ý…—CîAEFGÅÆ`"+%ÕÑuÈ‚z''‰,§Åæ1•_X ©Rã‘…¡SŽUÇÆ­²²âk¾Êl»;¾¼ce.kËëþøûÿðã].ƒ±ˆ$2R"‘H…÷¯™6×V–—–ÁÖæ­æ9pb›Éh„,«Sé¬jdiB>\X&’Šx3#€ìyO×½Û×®œoi¬g¢t™šFbÞ_cÎÉŽi:^øÉÆÇd ~Þ¥GtNxÅ¢æ).Œ¼\œžßZ+“UŽtW„b’Sª¬d2XU;1+0p••_[eyÕͧú­õõõµõõ×oÿøç?ýÛ+ ùå2©T"%!7©”šL½M¸<·¼º¼ ¦¶¨1,(æY›Åj†$«7tPÀÀXÀ“(8|™ˆÏ™ìþäQ׫WÚÏŸiª®(/b¤&Æ!b?OìȺ@R üèNaŒ„ñ ÁV´Ð þ‰F’¯¨ñrWðÜpE€ä’ð =’« h••¯J­jp]×p¯¦ {º±;-³¢±—·µ¹¹µ¹õzë5ˆ°×ÿ§?îåJ %¡TÈôË󋪧íFññ À]·Î®®ÚZµBc]˜C³šÍˆiZ• ‰5…A§*eb!wz|¨ïÙÓGw/_»xþ\s5ˆ¥'ÆGG†…H?CÌËŽu:^H}ù@µí© _h>IÎ *Ÿ¸Ø8HÉx{++÷Ä"È€…˜*,BÞWú¡“U¥åE¥åxŒªp&¦ÃºTMM "Æb±à.z„ždV}9˜•öÍÝ­-Ð`;[»ÿÝ»¾]V+ ¤+•*4ŒÛ»›ÊÁþYÎæöÖÚºmqssÓªŸ[^Z\XšŸ›³Y-³+6ãܼ1“Š˜Á‰yì©áÁ¾ç Ì:¯´·¶Ô³Ê‹™© ±@,ˆè$æúµMft6ðü‰Žn!vÎE{lRÖs^X…˜Õ ²/P£… À’ ‹Ë!T•——"ñ€Ù”3Ê;p‡9(`pj€U->X5Õ,ŒX ²¸/ˆÕ îìÃï?þîÿþço×ÍZH‚J• 4í¦AzVÒ)5/o//½ÞÞÚ\Y\O^À†âšýõ»…ù5« O­³ØŒzZ&°'G_>{tívG{ë©æÚŠ(.céˆX /NÌå§g"=±Õ;(Úøù½/ Ôj FGÅDcùÑ™³³1õŽô{YYñQЩüL’~.´*˜å°jV5F¬öø`©áÄÐ÷pŸu"»$ÛÙßÝÛÝCcïð›·ß¾û_ÿþaV “Vo]YZX_ÛZÞ|ÿ~Ïny±<7f°n¾ÝÝÙßßÛÛž_ÚÛG¥ÁîöæÆÚæÞîÚÚÁ¬ÍlÐêçív3„6…Tȃª¡ÿÅ“»w®]l;ÓRËDÄ’Äü¾BÌÙ¢ÀŠ!‘©ÈŠ,úº2…hĬÂXu]Y X-«öóQǪÁŸcÕ5°>[]OÕ?ÝûþÛß}ûñÛï¿ûö›o¿ûöÃû÷ß|¸ˆ‚ùüîá‡[Û¯7_ÏK$uúA‘eykiÉh´iTB±Áb5YŒZ3ø r|ºûò^Ç¥Ž|ûòÒ¼ÅlÇ”€.ƒPÖõàVÇå¶“µÌ²¢ìt FCÄüBÌé’îýÐd= à FMç0Ô„`wP°piS¿PSRrÜ’˜5¡Á,0³ªªrà,2$§ª+*ªª @i¨«Í L/¬¬­¯ÃIÁWäõêÆ†Êê*VMEMe}«Ü”U_W_Î,)¼®ÓZæ-Z›U¨Ts…­˜Ëå±ÙOn?|òèÉ“O/ß|róÒõöº‹•i©q FV^]q9« ÄMviNaFjvNJNrbQ”¾QÔØP’Ÿ_ùŒÞfT) È@bŒƒŽíêºsýJkK³4?+ˆER¾FÌ Ò£ŸSz.x«ì ëã ¹ Õ"ï³s‘‚@áËQìTÖÔ°Õ•çûg 7WTæWU3r@R”ÔÖÖ—ÕÖ70ssÊ r+³*+ƒÅgJãòSKëššëóêN——†%ä2¨qôøÂˆ¬ø²ôHʉEù•Åñ ŠOXzBUnFvƒE ‹ŠÎHÏMd„ø‡ÇÅÅú‰ðÞýƒ >žÜ!0{’I'Ü  È]èAnÎëê"êèƒ' ¬VT­Ñ©3Sã#}Oz>¸wïæ…–SÍ•EŒŒTdc¡GÄÜ\¾hì0Ǭ8Ly…„PÃÃPjGt‚1*mÔßÂKŬ¬ï‡2b)Äñ¤²RbQ,40Ø#ŸÝÄ, ŒI óñ¡RÈáÙIéÑŒäôX_/J1Ü›îFi¾×^ž_Z—ö”F tƒzÄàæêE $“ý¢ À†3H$º7Á•àAvs rñÆÞ»«—1,Þå„››§§cyܸyBÜÑš7²K`€‚ÿŠV`»‚@wA]lA|ȱïn¯­Øõ³N­ógFûž=¾óbëù–:fq>#%>ŠF%£ö…7áxªD&æé Àp^A¨MHAóÝPgµècPgJíÔ4¬ÝžÉÈÎÃxa¢„YU‘E«,!`KÃÑrNŸš %5)ž'\ îøjhw/ø–—·«›+úTðÜÝNxefFÓ<\šX$OOÐ6´,½ThÞžðØ=38ÇèGŠ'ƒãˆdoo*™@K÷%z’ÂdŠWLŠ«d€·7%È‹D twq ÏL ¥ÃB‚‰¤ð$FBp‰š›”B  ÇR=#ãB(Dÿ˜ØHdÍÖ½ý×›VݬÕdЀÄûê郻·®µµ4T—ä ÕF&¡Ðœ擘ÃT`mBl­g$¶À, ^À+=Íqê'“ð¨6,)­®iL "¸xÇûƒ+xù9^“~…šVMu%€ÆÖäÂáðñ ¤ Õ4ü}\<¢È.'ü¼Ñ’²ØšÊ¸ð0J<1*˜äE £Åg§–†Çgf„äÇæ„çRŠJR£‹r«K«²)ŒœŠÊÂü”¸ªÜÜò‚ÚÚÜ’ô¨²Ü0FV45§œžÕäÆ<{¦žQØœ–PQÛx:¾¢º¶¹¹…UÍÈ .hÊ ÄGçäTWæEE¦Þ²îooRƒ J‹Iÿ!D¬óÚ…“@¬0'#9>ŠŠ'KZ®tì.h0ÂHÀ‹Š”W$êØ£™]Hy¥¤¤¤¦áÅc6VU#=Q\\VÙpµr8U—‰ÉñÉ))Éø+z¦> ʨðKÏJÊ×-+.ÎMÏÈ/-ÍOKM+,*`dE5¤yz»ºx»p  À‡.­ ɬJðJ) &—2ÂóËÈÅõyþ™…q”¼J è¢Ì¸ò²š²À¤šÚŠoÊ™òè<”d*rŠsI™yÔ€Ò2JRSrpqUE³"$³!!ª¢&žZZÁ(aV—BÌÍ%fÒà óèÞA”´2™Fó/}®_½¹º`·5r‰€ ÄžtCqyálcMYa."ŒË‹£0†1ÔC>DÆO£Ñcs#Ðü”±WNNN.' KJAšVžë™áñŸfúRãÃÝNœ Äe܈ÍNr - Wõ°9|¥€ûüêõöèd÷ƒ.®h(£°‚Å$‡¦Ü’ äø…EB2RHE¥AnQÉîþdoˆPîÔH’‹ÑÓ‹ìäáîáíáïàçvÂ# Ø×Å5(*Ð Ÿ%‰æÛ»ùúºü==Iä@??o¸ëççå?à-ŸóƒgÝ}}<¼C.- :xû¸Ç\ŸQ-l®..Ìšôj¹ˆ; ÑÿɃ»@¬‰UQ”Ä¢#¨bG^éꔨ÷â ¢W=o«"]ïõ™Œ<Ã>„;ÁÕ=,;ŸüÛ/ý¹An¸®Z_ÝX_ž·™ Z™€39ø´÷á½[—Î7±*±dùSb.®Ø©ßT?R¨á &b°åhÚ .$²rñs°ù…X_«¬ÂÑäʘںêjVÒŸdVsmR ‹G\Þ݈¼Jýéê‡}E?w˜÷òÎýû÷ï>˜çˆÆ*Î>¸}‘އøƒ­¸&eðcSJòð‹`–z¹ü«ˆ^›P­n­o®[t@lzøÅ3 vù|K}MENŒLúœ˜‡—Ÿ¯—)”. À¢Q[)¯´ |ºM6N k–~ª„œµ4‹™”T„:I˜¨¿‹Á™ jS¦Gh¾WXGßÕ+]9!>èTºw@(d•èDšØç Z# +×´8wBd¤OXæ€-ÝÅ%¦<ÃóŸ´?Ž·¿Ÿ¯Ÿ¿Û±ç¨=†•­µ»Ù “ƒðë{Ùó ³£íL"òâKbàˆ˜?•J £ÑQrLHASß°œ¼ÜÜ|¬×…{‡3VVTâÓšúFFˆ·Ë Olê0@ Mb„»ÇžìI&7ÑÝètÚ%©Õ¼°Ôwª¡©©¦¤¸ ªÂ†«­U…åñq~ž@Ì?Àí„OçØù!­äI„Ë fbn䰰Șȯ™›khvFfVvø×¨¤çåäæå&†ý´ÅìÁhk¿páÂé,kñR&\‚ò~kmy i Ɇ^õ>¼ÓÑv® KOü)1wOoo"%ªôÐðÈhð“Ó1gdäâ½yt"£¨¬ì¨t¬D¥4„¯úÆÚ¼Tâñc˜ÍÌ%{¸Çä?ªËh¤ûfÄ¿xÿoçf‹ReÐÁÐ.Þ:=t±€è™{2=8Àå„wxi8PÉxbøöÿÓÜ?Åe¿KW®Üºwáköæš~óÒ™S'“¾FŒV²©©¹!?ö§ÿÑ=ýäéÓÍ-'óbΧÊåvûüêÒÂê‚ ©~ãÏŸ"bí­ˆXvR<ÝAŒpœRh¾W …ü$üÌ5#o«¢FNéQŸÅ`Ba]ÛP_–Cós?¥Ý# @ç»ÓYm¹õe¤€„¬Í?þîßþðÝßאַ¯[íËËKËKK»­­…~îE§Ã㣠‚‰I#Bðï]¾Ýy¡©,Éyü½ @Ìä$» ‹O8•èïÿ5˜ ¢‘–ðñòpùÊÎqø:SÏOß«·n¾^_YZ\Ó©åbH•£C¯ž±k—ZOÖ¢8–è$æ‰Ã6v@›Ùø#%åc ž³qo,B¸Žz\€ \‘UËbåGS‰îÇë»xø{x—·äyŸ€x–ØâJ¢|÷‡ï·8ª6i_ö>¼{ï^÷ðóá› éiéù¾®‘t÷$ 悪*jmÿ+Ð#(½¨÷A”ËÑ‚1¨º\epùuûÔ®|üæýîÖêÚú’Ý¢×*ˆxùø~çÕö3Ì’üŒ”ø*r¥¯Ã+‘BÄ|д¦Pød|XX#­U)DçaËŽŸA„’»–UU™äû…JrÉìÏv!‘}á^lc}&µ%ÝÕ—à–2&ëyv/“B£°MU|‚¨dÿPÿð__‚{°¦¼<(¯D¡]&\<è…Iÿ²D £@ùþã›Ý¥ÕÍõå9»U¯AÅ‘—O»:;.žm¬*/ÈNMˆ £1OŒ˜+nc>¾H¼bÄ’RÒ3Ø<¥LzUkÖÖ×Wf†xÿôCù?ûCà *I†ôñ ÁQµ^‰AQ7×LVí`3䊢Ü× nyéËMOŠŽ€RÜ™*ˆ‘bb£bhˆX"ËEQQY©Ã««X¬TZ˜ß/Da·øÛw+H'\SºO'¡žI´BËoÞüðŸý÷ï8TŽ OLråã=ÝéÅ¥Þ'Q>ÇõKP°Û?—=‡ïîJü”{“} žn¸`ð‡ZÕ#{Œ?92Æ™·||ûæ¥ÓuU¥Nbh)*_]=Ü<ÑüRdêOG%&&§e@QTPˆN£•3QƒžYUH#~1¹D¦gÍq=áW}¹7Ü“ìîF ή®­­½Gìîù¦²È°Ð¸ŒìŠØ¨h/Zˆ«GZšç9øê EVV“A ô?FÌÛ×Ç›D‰ˆ¢FÐóX¥éÙµÕeEù…,ff‚¿¯¯—ûÏ0—Ï’€ã„ƒoåù®¢†"¯£©{®’/~,i‰NŸðLk þ‹‰ý´ør·_—>g¦GÆx¨IvÿÖ•sˆ#562œäçãèáÄ|}È4zB5"*&¯±!/«µëfGëé[L·ÿá3¹ÐO50Pm:åKŽJ$¸(QuÏÏšë#âŒTZLR:#3%5>ž]”—™FKÉI(yx7!2=#:ÊË´Ìôl#7­ªÉLÏLÊÈÍÌÌÉeT–ä¢MS à@£5}y¥¨ÈÉ-`dç2rrs2ÒÑâå¼ü¼²&Z?QTÌÈ-«)+b2KP糸¼8¯°(¿¬¸²¨¨ªºYSQTÓPQRZ]]RY^„Θ2™¥9eeùYe%Ùyy…ùù FVvãÐÄgz’ë0béñ‘!Ÿóó÷§Ðb¢bÓ²2+ššŠm/»ºÞ»™†ï’ùÕ­ŸÝÝÝbž û^Þ¿wÿ~÷ýûw½|15QUÿäþóåÖÁ »O>8ÁÌ ŒNrxµJÅ|õj”?Ý7<Æ{0,œào­>zÎåL ÜŒM±¹\¾;2Ãy195=Ìšž*ØrŸÇ~ÞÇás¦…¡p ®îøä8¼6or@ÈçðEb¡L+r^NÈxSB©BªVË0ÄJîK®|’-Qòø*õªl%3-Q(…2öK¡T*•ˆ¸ãö80êìèï{ÖÛÓóðÙÈèØÌÈÀó‡÷n\„:©‹ uss LÊÎËʯi:Ùpy\<>0®ÔŽ]o¾ÝÕÞv±£½ýbûE´»­í< ¸9wæÌé ]ÏMðÙS"ƒÂb·ÏÍ®¯‹íV3ßò͇oÖÖß yns6ëœ}vnuyu~vvî¢1§ã¨í³ÖC“ivN®ä+¹£J½Z¤¶›€Ñ°DÏžÔòx|µÆnS „J¡–ÍçÎUb•N¬ÎÍUZµFͳ(¹J@+—„Šy½D!Ÿ3Oñ• ™Ld³åf;šæoWŽª—d±\.µØF›Í¤›‹„*£Á ‘h5•BŽf‘ y\{r|llx°ïUoÏÐè(gbøÅÃû·.žj¨*bôŠ£kíî F홚–’ZÐÐXÌš– p4îS©väå´œ£ÐèÔJR"SÊF‡‡ør…BÁ›‘Ëù25ÿÕ“’'P˜M‹“Õb‘h·¾Y]Ü·ÍZµ:ÉŒV«ÓêFƒÉ¨×itz½Éj5hLv»yãã’Ù:+óåcÃlµš/š5LIFG&Õ£/eÉ”ÖhµiE\ G5-žäidW>­ŸÕpUR©‚§œ` e\ ›­‘ˆæ´<à8+áÈå* Ï<«™ÖÙl‰ÁfœRÎ+G9Ób¾Éb4š,: [*eKÑüÎ1žñ•ŠE>Þ™™~õâÅ«‰±)(”úžvݺÔÚˆˆ%ƆýÐvGŸ‹‰OÉ€XRÙX•SrýlK+«ªñvï6ÿñí/|PvÃÃÓ¼¡ÁáÑq±`|d’/UŠ¥ý2«^ÌåÌn­ÛW­FãÁþÇå}«Õ®‘IeR…Á:·¶bÅN¡jŒÛâüòœÙb1m~¿b¶ZM&¥Á`éM©É œ3iEz%Ga0pUØlj¹Öhàñg«Q¦3i:›A¥WËT"‰@nÑŠä2Äb²Ùô&­vV%Ñ©Ä ¥ÉfmV‹ŽO5§žä “Ùd2™:…N¯Öéu:=+`2‰“ØôøÈøô$oøÙÀ ‡ÍçOõ?íî¼Ôö‰X€$K·Ï‰%gä–Õ46µ´ž®:Ù ¶‚ÙxõbmӥΫW;ïÞ¼÷jd¨```p\~|ÝžìÖÚŒ:Ϋ¾Ûæ»í×û‹Ûö›wowö–,[ïÞýáÿüáwÿöç?ÿßÿüÏüÝwûoö÷ö?ìÏίëLï1«Õl‹Ùb2/mìoØÍf3þŒÅŠÏ@GßDOZÌØ¡oX°g,6tk„§g7¶—°Ÿ°XÌ&ôÿ°ÿ W6­aÖ¤7™FÌd2€?Â@=:¥BަKÄB>FŒ'æK,6­lrd†/ž@…F¬…‰¦ÝÅE}‘y8ã)„Ä Jkê›O·]n­;ÝXZRQVN{Tמi¿ÔyçAϵŽgÃCƒØÀp´‘‰A®Æ¤îÙæÍ‰}ËjYÚóãòÊ®ycïÛ¿þ÷ÿ×üå?ÿjó?~údD²ññ›å•=5wÿh0l 6óÆöºei{Áú+0²Áÿ^Ú\2™Ïÿ¾ÉlƒÇØz | ``bcÐüa™D"ð0b\)Σͬàñ„|!g¼ÿùã΋çë+òó³’b#ÂȤ/ˆ‘ÁÈ’3²òK«ê›Oµ^¾ÜvýfýÙ¶ªr43kž¹p¥«ûî• ·^ö#býr“ÂþQ¹I68,7ZÌjÃÒږɼ°ÿî +Æõƒÿñÿýù÷¿ûñO0MÌL‹¥‘Vc]?üøíŠuΨÙ9´áÓb6ÎïØ!{ØMß.9lËdqóC÷lçø° »B7Ìôà®Érô-dž¸™3:€iT*%fbˆ—ÃaOOñ!Üšææ-*¡BÊçN@1~ý̹†Ò|FfZl-$(à3bD2ËÌ/­¬m>uîâÅŽÎÎí•X ަz±X¬ºæ³·ººî¶_èèyñjph³3 Ö7 6&æéøfçÍûÓyþíïþm{eÛjß]yýáýÎÆÁÛ½m›FmßÞÙÙÛß?Ø_Ý[ƒbÜyƒ³éxã3sv¡\Î:-1K*ŠrBW;=%µX%2›Žkû˜Êæ1Ñ4«™Ršµj¡Ò¢“h!2ªŒ:T%טàZÄSŒÇ€asŒ˜€I Œ1.O¡ƒ”¤1ÎÛÔ2‘@*äN ¼ì¾|º¹é±´8:LúŒXÈ’Ò€XEués.^¹qûJÛ)t·›WSSS[Ãjhn¹vçÞÕ+O{>z1‚ ±†å&†+_øÓ¿­ƒˆ€#=»ýñÛ·‹ÛöÙýu9O((LZ•’7ÁæLMÀ˜˜XšGÄÖßÍaÄ,&Šg°KÄ@ šÅj³šjžÚfÌ5×*—+ÌV©|V'˜…çlȱaUMNLN‚î²kÆ'ejµXbA"Ór†yr_"àTbÙô´œ§9æ“XSca ™„1 &PéÍ˳ófN#ª42!gbðÅ ­-5¥…i)qQ81@†ˆyzùBè¡EÇ%e€N®ªm>ÛzñÊõöÊ’’Ǥpì$[-6[°¾¡±éÚí»·o\í¼wïÅÈà{j`Úl‘Mض¶Öö׿ÁúþîûÅí5ËëÃ¥ÝýƒÃÿý—?}ÿýÛB¡” …RÍÒ |j³ýp¹M­Pë%ȱVÛâʬYnЃAd7€£YÀ”bÆlÕÉd:V§×èTVxD±Rþ ¦¥7ØÌzƒÑlÔjJ3³\j2©õ£TkÕKtf¹î b Äd2§‰ñ8à•|eEk_]˜³©D|1zll¨÷ÆåŽ •EyY©@Œ^éí æáå㌓”‘SXVUÛpòÜ…K7ê ‹ Šóè±9˜Õˆšá[_ßt¡ãV÷½{ׯÞ˜š·Zù¯†ÄÒQ뇕,dmçõ·«ËÛ–…×ïÿò׿þõ¿ÿqpx|ðÎ põÇCÃܵ÷K(¾¾ÆbL¡Ò¬[ÛsÆÙ7oÌ©o"ð›|Z8ÅpVÃÔ › jT æÙÌb¶pzz\‰¬MÌ5háÂD&²Ù°T)WkE©0Špà”F±Æ¤k“CØbÈÄØœ™I‘Ѫž[]Z˜·J8¼á¾ÑÁ'w:¯··°Ê >'†æAጠ%`NAiEU}ã©ÖöëµèRéQƒŸ‰Í"GÌð ˜u-m—<~ð £khpÜb õ«æ¦'d³VóÊ›7ßîï˜lëo!YþåßÙ¦Ñ –‡==/Ø 0ŃU¬ÑŠ•ÎbU³Õ›wöß­ÙÌlŽÜÓúÝíÝÝU»H03m²Yfú‡x¹@.°šÕ|‹^«š['•q5(âYDPf ìf¥Î`šÕkL³è` o†èÄJ£–+Ó´Z-šÂþ‰˜ØÌÌÔ„È ÓÚ–€˜Q!’ñØcƒO»¡°la•¤£ê’N d>þDrxDL|*#¯¨ŒÉj8Ùzá" ›~X|4]š‰-éfuÎù½ ÎßÔÛ?8¦u:>00$Zx½½66ðþÛõ×ûÎêþáãÛ÷;ó ±TªZØØØÜz½»ô'övoTJ¥¶ù¹ÿò§ý%;D0#òUÞ¨\*”T:ŽppDfbƒ<Ñ4OÆÕUÓöÅÕ­Íó¬‚-’ bB±\Ê1ZõF¥qg{÷`¬ ÷è¢ÓÌ<±~a}}}eeQ/lj °(63=9®Ô——íV•T­’ˆØãƒ½bùÉq±áT2FŒà nI ‰ˆ‰KNÏ.,«¨©k9ÝÚTœŸWPT\XRZZâœTî˜Q~ljt]ÓÉ–«ÏÆDƒvlpxhܺ=? YÞ¾»sðnqkÅzøvM=5Ñkkmok={öÜùsgO·¿F´ûfבŠo¶.~óû·;{»»©¬àyÓ:%ß`V(ù‚A¾ÑjŒ Žðy“`S †'§Æ¦„b±Ì.f ˆ¤8±Æ®ˆer ör¡HtLUh'•f©R¨ŒIƒL Ô+ø$*)1¹Þh[ZœÕšR­TÀgô>¸{í|SUQfB\,-„ˆjq/œ˜‡—·?‘—–•_T^Ūo9s¦ MÜÌG³\ÑšÑÏÖÄÔ|6¼¾í~¿Ñªyõøa÷#åÖÊêúŠÅ¼µ½ÿfþ`Ÿñ~˪ŸÛÚØX]4ôö‚*ì}òøqŸbaœÆ¸üf^kB±ì dÒþáÚê¬mvyog€é5z(z‹Å¨N !Úõ™Z%S ´ £N •óDz£Je€Ç Sm6•F#•ç´R­V)æñ¹\DbÆ¥>ˆ~ƒŽ§6ÉåÓù´¼‰WÐb8åôÄ8Gµ>ÆAqßn³Ôr Ÿ=1ÜÛq±ýüéšÂ<(’"±3¼¾˜ºÀæ'‚[‚(´¨¸äÌ,$02&3/3;¿„C[”?É‹ç1×l¨¯oi¨«o¨¯ë0³Ù¦}»yñàðÇÝ…mÛüöÖÿûïÿØ‘J•²Éá™™‘ “Ѥ°î-@h{·«‡Œf—¸âqí‡ ÈW2›Þ0ÞÏãŽpÌVåÌÌпiP V5ÌB)4jZ#äW»jFÌæLNó"‘VÈåsø $l¾F4!3ð¤(5B R­:‰T§7H'$b û …‹”…DÈÃ|ll|lt„gš_°[MJÈqöèÓ{çO5²ªK ÉPV¢3¼>@Ì_®K·# §Ç&€îÏ+©`²ú2F:©TZþŽivjøÒ—ZgDkh¨ojn>ÙÚ9Á·­nî­™@Ãîþñ`åÝÂüöÊÿëÇ>~óaoç`Ùd]}ÿöÍûm³ewˆYßí£Œ6+cOòöì\Û›5¤NgÍ& ÆŸš±ØLÙððÈÈàˆT¦°i8R‘@)5Éù\%r¥Á(“¨Fö¤DÁg‹ùF½T6. K¸TM TX]¤GýÙ„\¦ÄÓ¤TüÉÄØÓÓ`cc£C“j;Ó°§&'fúß¹uµõdC5Í‡Š£‘ˆ(ðãS9мt/oß Ÿ˜ž™[T\YUÓP›&V3rò°E·EÅøéqçd(< ­Ekë+ŠOßìêxûâmãÛÝ-»Åj€âñãëíóîŽ}}gݶ´=+Mò$*ÁbÓ$ë ï­‡ûP Úl’a¹BtF··a…À¬×êÆú¸¼¹VoP+GA¹°¥z­^%0j9b•^6Â1kf4få´R>:=Ý?ÀQÈER!èYåV>:$–©P9ªÖC§WLJ±µqHí#`b> .—ÍK›žšÒ/ÍÏš%BÎpÿ‹ž®Û×.¶Ÿk¬e–å$ÇF„8ˆáÓ0Eæå¥e8= 5Ér K@ù§:š2 ÑÂîÒRl!J˜@cÕ·_yÜ[ŸJª¨¢åsæ,Z½N#²ì|Ü;Ø4ÏÍÊgƤ‰bªoøÙóW£cC|…ptTfÑ K.Ì[,6õO8204808-·Ù8PßÍL Mq8SSÃ#ìglpzdldT„ºb …\(äMŽq¾@Èò&‡&yPíð9‰T«¸£\0"rµA+•© úAˆ“+©T,–J<Ða Â&ÇGFF$½F)œžQ[çl:ðð¡—½ïݽzéjë©zfQj| íåæyâë+ñyþ诂Ñ"£R /,©dæÆ¡­°Ihõ6‡¬° °JͲ£)ŠÕõ]c}}Ožô<éx:ÌáªôB•BÄ•mo¼ßX:˜Ó¤ …¥ ô¥„”LkÒ™ :“Z¡µçÞmšÁ.äž;4-Ì6¥PÀ(u*‘Z¤ÖHÙ|™2Ú(?= /¬–¡v—^+âñx2=Äz•R090)QÉ¥ìáaÎÄÀ¤X%ƒ™dŒ …"H²Rîà›?=è4”µc£ýCãc}/Fú_<ïÜÓ5übDŽˆ©à=Èà½éeÓ£ýÏŸ=êzxýʕӵ5¥¹©ñqaPTb=ØcļÑLD,>9-3· ¨´<[S“ŒmT‘™‰”6×§¤¸È)а¼YßÖÔÜ=5ÃåÉÌ(-YçMZ£N$¼^ûðzùÀŽw Ô†å9Èñ–Y°«ÙÅå;<ÅWY´ªÃ-#ü„'¶j†%f“L®â@ú“˜JƒÙ2­5ŒÃÄ_=­ãÍ(¬‰Þ¢˜,bÖTÔ Ì¡Ê¢˜‹Õj£aZd’±àyjå¤2ˆ.¥\)“A;‹øÔ à€NO˜›˜ŸæêQ[jU­‚¯ÕË'^>úèn÷ÃÛí§«K‹3ãÃÈDç’T·Ï‰…  ŠñÉY9h=HjRZB2ZîØ!#†ªÍ’£ýp׬;uæì¹öÎG£2i~Q£S³%Ó+ëÞïì`ÄŒ‰”Ñ(S¡Æ3€<2™c«N¹}$GÇí›MS#bžZÁM –š,3 ´@¨ÍLr¥<Íà°Üb”É,š“Y À F-ßbÔ­r¶J$—[MSÔúéAã«´‘Xïèj´XìBCŠéV®óô&ƒR`0£ê\o\УEã\žF-}ñôñý›×ïv^mm®.+L ðÇ6îÁOp"}áéíëD‰ Ç€3¢©=EùéÙIø>騿W7ÑZ7¨ð)x83$ÎjX¬šºÚºú¦SçŸL>8éNßÈ+û›Ã-û< wªïÙèøàˆH2Ö;Ê~ùjbld\šG¬i6¶´ð\Gµ‹\©©Õ:%Ok¨ F£Á¨`«ÕbƒbFi6êµfƒÚlÑA±"Ä(¶ |_£“äOÛt&H(ÒKUXŠÔéÔ YeøZÀ íñ`WYì6$pz„-³ó9•têÕóG÷ïÜè¸y¥í d´¬”ø8z1À'æúi—#“©Q±qII9h"zqN<¶hm=êX‘‹¶Áfy¢Þ6¥ ¯œêðr³¾¾[#ÙÄûÆv¹éTsS¶+|Õ765Ö7 [#gOÁ7N±—f®]{ØqªådÛµK§/ßè¸vµ£ãF÷Ý»»nvõö<îõäåÀ«§ýƒCCƒ¼Ù9“mqÁ¾´¸°¼¨ê‹Ÿ¿’ê¡v°Ùͬ•£7¬ó³óh¯ÐÂ)-`A:­ x§P¡=.øWi[5ˆ5g‘\S«Á'!LMr†ûž=îê¼võÆõ‹§«‹r³“£BÉ~èä¤Jçê]Olý‰Nd à‰ì‹rãÑ®šñqñhwÙláÚ¡(/í-€úج~¬ `}¾—Åê]VUû?Vm÷ÓÓ,gY]Õá÷ëê¼ãÒ€±qaîÀºñ*Ïï>¬žô­kLl‘èvæm{ï— ŽÞ×ìî»ûû;;¯×7· :[Ÿ›…ˆg²®,Ú´P1¨Åj¥\ÙÜÖÊ¢¢ÿ¦Cg-ÑÖ3hÿ†žî»·®]k;×R]’ÏH‹¢G  È|¼°?©äæ~´ÞJhÅH6×:Â6#/ÛÐ<[DöMÁs#×9ÏYn:k€cÔXm]M¬Úh·ºtU‡vkjh8i<°|x·eÙ~ÿQ•yo|Bµr¸±c3ï¼_wœ$ÒÍ"b‡û‡ûû{{{{;»ï¶·í+ï>~÷nsamimñfà«g··w—‘Y‚ôÓC…VÉGëêûžõô<½{ÿÞÍKõu5EYi©ÉtZ9 Лm|´{Ú„à02*ŸÏŸœ†6rŠ Gë£ñ•ôɉIøÎ99h-*ZˆêØR«X¨­}„­®þyYÖÕÿ3‡¯×‚«7u­¯Ùö>¼Û]8øðaMüá»Þ¿=|kÕ¯}سáBUgœ{}°wx¸·¿·»·¿»½µùáwÛvõÜö»·Þ®/ì¼¶Kf“N`€r òö²Y‡ÖŒË•&묉ÃM¼zùôáã‡ܺr²ª¤(7=)>:",4ëZ|þ—Y\ÑŸq@Ëwƒ@Æ$,z¥¥ÆÑÑæOh/šãÔ¾[CNš%‹o's„¬eÍ#b,çÍσcÕóÏ_w ;³³Ëoß¼ùøöÍ›ƒ÷s‹oög ¯ßïÏê°ZH§3¬o-nîîííî¾ÞÚZÝ?Ü\]3èìóöÕ½ý%‹Åbéáç°M :ó¬VmÔ)¥r³U/gOŽF{Òu¿ëî­‹WÎÖ”dg$ÆÒ#ÃC‚°?>èù1ì¯w€Â P©€,6¥$ÑÑ®Ýø– Ø:Ëä$Lld¡]žò1×tÎ)v®¸ÁÖ‰¬[‹ÝÖ ÙÙU5ÕÕ,|Sc¸­ALYµŸ6»¨û9zu}‡»«¦­Ã·ÈóÞ¼y÷î`ݦž=8˜Óéñ³j:“Ÿ¸³ >·µ¹¹¹²¼¼²¸0‰Àb\Þ´ÁOh¡òÒ©4j(,&½Ri\˜3«4*1{f|tàeoO׃Ç÷nž?}ŠUZ˜‘N&¢?¥wô×Úý å8 m†ö"‹¤Ó(2% ÛYý´Z)15%%íHŠÌ ­»qÎó¯ü´J ¶IJµc³”ªŸGߪA[\8ÆgÄ>ñcïØ·6-›«»81oßž7/½Y Švyavpm¬­­¢}ŽÐÖP6 ¦ºtj• ÊÈœ 4qD¯–IÔFû¬^ÎgÏŒ ô=ïyòðA÷ík'ë˜%E9™Iq1‘áa`cþ_!æh`ø ÝŒ°}² |…áÛØ‘ÑŸ³Š‰Œˆ¶‘ ª7³Ð<.Ô*sÎ^¯<¾k ÚûƒþxJ¶þ±QU…m5LǦPLç p[s|€}:ˆÕ\4­é­ë³k+ëû‡ßßl›æ÷÷·Pÿ{ñpÀ¶ìQk@y)MùÂÖÆÚúÞ:ϸ¬ Ú@dh5J…B64‰s“JD"B)RÈ5ZþÌ$›;†•FÏžÞïho«c–2Ò’âètZ0 ŠpDÌ1)ÌÍý1Ǫg‰BE›EP¡^Çw ¥DÆCH‹Œr.êM9†¬¸´âhë¦JçNO•ÕÀ©Ûi§²ò+;ŠUb?íü?h;-\×}¹íL5¶· Ï—æ,[pËý½½ý…eðùpoqéõ¢Íöúõ¬B©”)Ð95Úŵ•å÷KsöY —›5€P ‡§ Ô  SJ­ÕH'ÇÆùü™ÁžžÞG»®´¶7W–åg¥'ÅEEÐBIDôW.ÿÿö®û­­kÙ$It0ÍQD‘D•@4cŠé½™.cºEïÕ˜bcpãÇ%Ž}“¼w“û½Þ_ç73ûdŠA‰ëûØóÒ‘>Xš½÷š9g¯%±Îbb[Ä„$SA’y£|v8f¸%ŽI¡-2’ö-%jid¦ÃÈ4ío!Ì:¦Ó… Ö–sü®¸óÛ³_Þ½ùý˜Õÿç-)£ ïþ÷·ÏûåÙë?~Ï®;ÁëžþüÛ_>üüë¯?ãÕj?ƒ@¨]DikcscýÁíÝ- ²@ò}·½07¿¾±::0<>ØÓR^YsÑsX\TH ¿01¥›T¶gn)ÛŽJÚj#gf3hïwÞWÆfIŸ£® ‰“Bˆ>‰émf˜>MÖQA‚G6Y'ì£bÙ‡’-·haxà§wþùû¿þõ¯·…ª¾ïÿøí‡þñæý»ßþù–ŒPí>~óÓO?½p½¦Þàõ/zÚ½µ¹±uøêÚ=À÷>¾³ºº¹¾¶2=5½¸258:4Üo®.+/Ê4¦Æk4á~è4¥p•J÷íì†}bdÉÐþÉÇcêÌrÌÇ›äσpo$+Ðaþ׳,Cj¶×o´ÖéV¨„™.333ó Ú_–Uî/ë’UGQb f›²³=¼›•U8~ÿå/Ïþã·Ç­¯úïW;wŸ¾z¾ûê¿^ß½sRh{{û;`þ¯ß¼yõîÃÛ·xB  ÝÛÛ»÷žÜ^^^^ÄøÝû·ïÞÛݽ»{k{sfzblfiq~fÜÒµóJmU‰)Y¯‹ÓDúz{ªTaR+ƒ!iƒØ~’¹)#Wo?%:÷X%CPÛ.厂Qž:2š”âi`BÕN-Z¦Ä&hEPp“‰j›$µ‰:¥FÛ@±;c&…ñbm¶Ñ”™Wž{„2® þÌŒúù§]#¯~y·›Þüã³O¶¶ñ¼î³÷¯¶±›³¹¾ñ.zùòù¯¿øð /GÁ {ãÖ£Ýûwñ’¤™™™åÕ¹ÕKK·n­Ì¢¦óÂüÌä`ÿ5s{suia^JB¬Zìëé©Ä!É‹Äb¼Šú0b¤ ¥dòžhI*8i1½I? FAš¨h’?‡šI¯Çýö†è&, ¤ŒKО‚Œ›ÁhüÊ^fLÏmj2¥›2ò;KLŸ›üà(j©¬­Ÿ{ÿ¼mtëλïã©û/¿E–7¶ŸÓµb/^¼†B’Nã>}¸…­ü›Ë+³SS“SSSãó³£#£ K‹c $×äõÁî®îŽ–ÆÚ²üô”ÄØÈȈ`oL0VKpg«Ám|x­ˆÉHGK¥r“+=Un6Ö(Cæíí͆I¨@£abñI©¬Ù˜Fÿ½ÁªO-„ªv”$í`”);ˆ]yKKAYMsÍ%S:)Û_ãÞIfÄ̘7?Q3úöýóéM Zw7nn=~rçæÊ›këkË·ž^x‚Rn%®â#<ž„©ŸI™ª<Ü]h+‰Õ–¦3¶ø²,#÷s¨=©”¨'MÓ$cnªU6žPÂMùÉ)¨›› Ë*–£z¸£O$ôd½åÐIUW¯¯¨¬l¯o*ÌD½ùTTœOCÍJü&fv75­ÂlY]Y}öòÇ—OVv67?}´3?¿}këÎâÌÌÔÄÌæ]ä¦;Û;ß¿xþä ^X·OLŽŽÏÍ ö^ëé¾j6_GMzÈ/˨¥¿½­¥®¶¹±¶¼(?7+M—§ñ÷óf¾®.¶–ìG¹!I˜–¼°‘»ÈÜ\m­Yê¡ « ‚‰ÌÿgRwÀ4 .×@>%£êÓª8ôZ­è>¬Óiãuz]RIs¹±¦µ89YP>HeZé‚2j IM+ïyþðÁÓOŸ=ypyýþƒ»·gg7æ&nn.OŽZæoÝÆ³¶[ín­­¯ol®¯ÌL^ì™èï„èhëh¾><480ЭÇÜÑZ_SSQZ˜›eLÑÆF„…øùúxà©#Ù>«‰Ï—d2¼ºÀM¥”¹0éži Ó ¤EÓŸjs˜þÕQ1Ö“t ÚCE¾ NÊ2ïÓj­¾4äƒGJÎqVY›„x- WjáÅIÙIºÊ–^@yb=æ¤5• “ô¬)—šZÒ;øè»í¾{¸¹:¿´±µs{búÆÚüìÖ’eøúuËÄÌõµµµÝû[Ë ‹³³SÓã#Ã0¯u™‡ºÐͦ©©±¹»çZooOOÏUsWkS]uUYQ>TFI‰±áAç!Ã<îrW ³ù“8رZüÚ£$sÅ=Ñ‚)ì§ÍJÁRĽ%±hBQ²¨h:MŸbÒ“%JÖ왫 *8:EF“켚iÏÐôhtª!N“Qœ‚kˆF“h¬®¸”¬…ké Ëø ímôšN—78vçÖöÊâÝ{;ë7—Ç'Wn¯ ϬÎ,,.Íö÷ô÷õ^뛘[â…kfjvfÌ¥O_¯¹³¯««§¹¡±¡®®¦¶±­»££³½ñ««ª,-ºx%k5‘Áè+¢Rºïå×gLê™¶©3SI—9 àŸú€Sšy’Y” Æœi¢£Pc^Oúû‚º5Lqq(7¯‰€P«À Òòâ"#uÙš˜ì\=ó÷ÉÈ(+ÐÇÇ=€™§±ocûÆÒÊ^öuke`dõÆXÿÄâ€eq~v £³Ólîîê™^§‹è–Væf'®™»»;ÚšÑ$®¡²¢¼²±ª¦ñJKSKc}}}饂Ëùa3¦kãb€æûùö«ÌEBš¢ÏïøtdšZ¨¬¶gžnc½%ÈÍ“V³7¹·‘¯ˆUB*2:–Rg/ Ÿ¢b ³Ôê[?­=ÇÔwŽD¼£M—â#£cÔ±yÅÅ—“¢PÓ?Žnâ5(©˜kùXM楺9¨t&ÆæÖ¦—nÌôöO/Œ NöNONŒ^mEw)óÐèä ŠÅyt™1wÂÃÀµªp¶*.*©.®¬ª«©«*)(,É3e²²³L†æBpP€¹:[7«¤·DIæL[z%ûbógfï¶o7¯d žÈÍÈÕ=ÉÂÉð. ý¢˜Õy2ƒÀ°°0[ïSö«àÖ‹®6-/  ØÆN‹"4%/®Ø5ö6£uy9‰Wç†,KË‹## –¡•é®.³er¸¿c¨¯wøúÐpOCMM]ûÀ`ÿnͰôY&-C}­Mä”h’)tQQYQYyÅŬÔÿOMu8%)!†*oà­n@^§ØSì(“b§³Ó>bÌ”EÊ4›­¡Ý*:"£­4¹ü¾dä1‚(E¢ËVí*2kTx.4$è‚ IÉÍÉ»˜Ž@3sA­‰Ž ô÷VÁÐqwC–/q;س÷ÜQ ‰æ‚µºøh´vÊH"\îæ®l®½}}|P"Üÿ¼ÕÞQ &Ÿk|È?Àß×±†··7þ„‡¼Ñ/{Høè§š(àÇ쪂à­cÍ­H]ݰøuw´vvµ·µ¶4_ij(¯(ËÎ+­(‡d*+/--//¯ªª¨jh†é¾¾¶ªÅŽ3 ÊŠöñ…-à|ðGµ›K˜6hbDø…° X!=•˜`2L‘š¤Ü*ÁW9faƒá‰sš«TÅ&‰ª{‹™·Ÿ@‚¡-bÁà!gFO4G'xÁyž¬]:|Ú×¹F¡ ðóg–âˆ7ùbC›Jk›€´47_in±® ¶¥¾fôªª²‚‚¬ô´ì¢½=É,Q‚1L¾±ˆ²J€šàd©Ï“ý, ï"Xa’{¨Ÿ>?ÇTt¹¸ôrUseÉåÒB“ábY!³ ÿb^nvv&i2aWÈ€­lÚšrèÒÓC²jàÖ±nNJöÇ(¼B½PvÂQŠ}S?LrȬÄ_NP‚T ÷M[ÐþWO)UOˆ- È,Ì*\SÑæžœ¼îndÓ«R*¬øYo•øì}¼J!a¸Geåd Â%žÝÌMN5ågë“Q¡ßº!I§Ã^Ìë:]ŠÑd4f2 &£!]ŸKÜP.sZ¤t÷ðöô9¯ ¡˜›ež€5œÅö‹b¼::±5ÁÙêÝâŠå€›×šÖ”*tÐß*œéÐPmìY=/C_BøìàA7w2¥Rð=V²<„•Þƒ½}Á“îî  6é¤An–!Õ˜aÔÆÅêSuZ}¢.1{uhA«‰×BÅž¬£º=5%µ¦÷ á.N %|h4ìeç¬Ð8Ñ„”ÂIìèðÕ”\pQìOkÀ’ 74Ý3ÆV¢3¶ÓÉ%ú¨Õ&“ʘy .ôÞäB(ròÓf¯¤eE)X41c+7Ïè¸Ø¤t"#'˘’š”œO~½qš(5”cê˜h5vt)I‰º¬¯â€ÿ†‡átäå"‘£Ñ5RoO—½åÐÉ c%â¯*ð‚¨‰Ä6N d G6.4ôänr…-A ì1ĤÈM°LZ"2 *òpTQÆõÞÂÝDrz7ôbÅ{r™\¥ Ž×éµÚÔ½Vª¦ÂU°¨±Jˆ ![l,Ï 'V šìÃTR©k=`“îûù$¾´³““øïPÄÁÁ†u0@9à#ƒÌÂõœ3§úéâÂJ:MÃ,¿°)‡)ÇZÀ$‰Ù‡–-%°]=¢Ôš8Ô%‰ cþ˜aȘC ÄMb¶0ñCšx>À¯q•H݉ˤ 7›¬ ö·iÃj`³„ !ø7­ÿ#ar €ë­³óžm4;­ád¶¦‰)¥æ¹+Û\$n>çqZB¤BPMšØ. ;FÜ·‡„¡ço ðAFSÞxxºˆ Ç\äîR±‹»Ô¶ýpý½ŠKh{&Ú+¥˜5(~±&û:.Ø $VCéÞEé^¸°·•H• ß`¼¦@‚*J. €TEPeAÀA½èï„ÕÛ —`˜aîpr‘I¤bÛ?ßáïW¨,•U{ªHÔkn…;ÇaK0 `Ö¢:(³¿;+<±&ÅoÉ«-d}Ä“‰Öxúç…rן¸¼J‰K „3”ËR'‰Ô–­·Š×~I*>›˜{³¥ÂÁºÐ¾ÂÁb§Ó‡³“XªdÞsX&VäƒË+‘âtHè€éyùÀÈåRXÇ¥R'G±3z³I¾Ĭ¹&² G²ª.ÛÃèA´q31;rÌM©R²Ê{'^ܰ«Œ#!A> ˉm¹J©íH‘ƒHô­ öM­à®RGâ´6Õ°dpÄNlPK8bv!&vpà3—=ˆ‰y‚Ù"¾2òàÁƒrÄìCŒWGG!ö¹ âˆûhƒÓ)ÊFNAl3ë4èpÄ8‹àˆqľå²ò B1ŽØ—y¶·'÷8ƒåˆýUÄx%þEã•øgGåÑu•±¿Þ»8~ŒrÆÁûÒ#•GìkTI'!v|G#vìkl~ðѸãgaæ€}šU'ñy~ÑÝóŒ#ö•ã£ò¨yì3ˆð™ÿèµòXD8»8Ž™žÔƒåˆqD8b±ÿTÄøÅÆ±/ÝË8QŽGìkZŽGìKábö Oh1;²ËöÜ÷!¶Á‰…½ˆñ†¢ÝˆñàˆqľEÄ8~G•J~?‚}œí^ÅaÈÎI'¸¦Š½ÿùYGÌþn×Gì#Gìϰ“AåˆÙ‹ØGŽØ±³þI;Äù{ãjZ§[9ùÞ{—NŽØÉ#õpÉãˆ}„ÿütœò3â§GìGLköWIg{DrÄþ:›ý<~±ÃóÙIWs¬Žf÷']lÍãô½ Ž˜½ˆñàˆqľÍyŒ#zî:vÂóçx×úôW;ñ>¿½œá¬#ÆÏWrľÍJœ#ö b§¨Ä9bG÷0>ónÜÄmaffxparser/inst/testscripts/0000755000175200017520000000000014516003651017302 5ustar00biocbuildbiocbuildaffxparser/inst/testscripts/manualTest.R0000644000175200017520000000047114516003651021544 0ustar00biocbuildbiocbuildcelnames <- c("Test3-Tissue1-121502.cel", "Test3-Tissue1-121502.CEL", "Test3-Tissue1-121502.calvin.CEL") cdfnames <- c("Test3.CDF", "Test3.expression.CDF") if(!all(file.exists(c(celnames, cdfnames)))) stop("Missing files...") headers <- lapply(as.list(celnames), read.cel.header) affxparser/inst/testscripts/speedtest.R0000644000175200017520000000074314516003651021431 0ustar00biocbuildbiocbuildlibrary(affxparser) library(affy) sessionInfo() place <- "/Users/kdh/work/bioc/data/hgu133" files <- c("HG-U133A-1-121502.CEL", "HG-U133A-2-121502.CEL", "HG-U133A-Tissue1-121502.CEL", "HG-U133A-Tissue2-121502.CEL") filesWplace <- paste(place, files, sep = "/") system.time( tmp.affy <- read.affybatch(filenames = filesWplace) ) system.time( tmp.affxparser <- read.affybatch2(filenames = filesWplace) ) affxparser/inst/testscripts/writeAndReadEmptyCdf.R0000644000175200017520000000100614516003651023427 0ustar00biocbuildbiocbuildlibrary("affxparser"); # Create a CDF structure for a chip type with 2-by-3 cells hdr <- list( chiptype="Empty2x3", nrows=2, ncols=3, nunits=0, nqcunits=0, refseq="" ); units <- qcUnits <- list(); # Write CDF file pathname <- file.path(tempdir(), "Empty2x3.cdf"); str(pathname); writeCdf(pathname, cdfheader=hdr, cdf=units, cdfqc=qcUnits, overwrite=TRUE); # Read CDF header hdr2 <- readCdfHeader(pathname); str(hdr2); # Read CDF units units2 <- readCdfUnits(pathname); str(units2); affxparser/inst/testscripts/writeAndReadEmptyCel.R0000644000175200017520000000254314516003651023445 0ustar00biocbuildbiocbuildlibrary("affxparser"); # Create a CEL header for a chip type with 2-by-3 cells hdr <- list( version=4, chiptype="Empty2x3", rows=2, cols=3, algorithm="Percentile\nAlgorithm", parameters="Percentile:75;CellMargin:2;OutlierHigh:1.500;OutlierLow:1.004;AlgVersion:6.0;FixedCellSize:TRUE;FullFeatureWidth:5;FullFeatureHeight:5;IgnoreOutliersInShiftRows:FALSE;FeatureExtraction:TRUE;PoolWidthExtenstion:2;PoolHeightExtension:2;UseSubgrids:FALSE;RandomizePixels:FALSE;ErrorBasis:StdvMean;StdMult:1.000000", cellmargin=2, noutliers=0, nmasked=0 ); hdr$header <- sprintf("Cols=%d\nRows=%d\nTotalX=%d\nTotalY=%d\nOffsetX=0\nOffsetY=0\nGridCornerUL=223 141\nGridCornerUR=3590 166\nGridCornerLR=3565 3534\nGridCornerLL=198 3508\nAxis-invertX=0\nAxisInvertY=0\nswapXY=0\nDatHeader=[13..25244] 050523MJA_SNP10K-2.0_KJ05:CLS=3666 RWS=3666 XIN=1 YIN=1 VE=30 2.0 09/26/12 10:01:02 50102200 M10 \024 \024 %s.1sq \024 \024 \024 \024 \024 \024 \024 \024 \024 6\nAlgorithm=%sParameters=%s\n", hdr$cols, hdr$rows, hdr$cols, hdr$rows, hdr$chiptype, hdr$algorithm, hdr$parameters); # Write CDF file pathname <- file.path(tempdir(), "Empty2x3.cel"); str(pathname); createCel(pathname, header=hdr, overwrite=TRUE); # Read CEL header hdr2 <- readCelHeader(pathname); str(hdr2); # Read CEL data data2 <- readCel(pathname); str(data2); affxparser/man/0000755000175200017520000000000014516003651014511 5ustar00biocbuildbiocbuildaffxparser/man/1._Dictionary.Rd0000644000175200017520000000403014516003651017400 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % 901.Dictionary.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{1. Dictionary} \alias{1. Dictionary} \title{1. Dictionary} \description{ This part describes non-obvious terms used in this package. \describe{ \item{affxparser}{The name of this package.} \item{API}{Application program interface, which describes the functional interface of underlying methods.} \item{block}{(aka group).} \item{BPMAP}{A file format containing information related to the design of the tiling arrays.} \item{Calvin}{A special binary file format.} \item{CDF}{A file format: chip definition file.} \item{CEL}{A file format: cell intensity file.} \item{cell}{(aka feature) A probe.} \item{cell index}{An integer that identifies a probe uniquely.} \item{chip}{An array.} \item{chip type}{An identifier specifying a chip design uniquely, e.g. \code{"Mapping50K_Xba240"}.} \item{DAT}{A file format: contains pixel intensity values collected from an Affymetrix GeneArray scanner.} \item{feature}{A probe.} \item{Fusion SDK}{Open-source software development kit (SDK) provided by Affymetrix to access their data files.} \item{group}{(aka block) Defines a unique subset of the cells in a unit. Expression arrays typically only have one group per unit, whereas SNP arrays have either two or four groups per unit, one for each of the two allele times possibly repeated for both strands.} \item{MM}{Mismatch-match, e.g. MM probe.} \item{PGF}{A file format: probe group file.} \item{TPMAP}{A file format storing the relationship between (PM,MM) pairs (or PM probes) and positions on a set of sequences.} \item{QC}{Quality control, e.g. QC probes and QC probe sets.} \item{unit}{A probeset.} \item{XDA}{A file format, aka as the binary file format.} } } \keyword{documentation} affxparser/man/2._Cell_coordinates_and_cell_indices.Rd0000644000175200017520000000761614516003651024101 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % 902.CellCoordinatesAndIndices.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{2. Cell coordinates and cell indices} \alias{2. Cell coordinates and cell indices} \title{2. Cell coordinates and cell indices} \description{ This part describes how Affymetrix \emph{cells}, also known as \emph{probes} or \emph{features}, are addressed. } \section{Cell coordinates}{ In Affymetrix data files, cells are uniquely identified by there \emph{cell coordinates}, i.e. \eqn{(x,y)}. For an array with \eqn{N*K} cells in \eqn{N} rows and \eqn{K} columns, the \eqn{x} coordinate is an integer in \eqn{[0,K-1]}, and the \eqn{y} coordinate is an integer in \eqn{[0,N-1]}. The cell in the upper-left corner has coordinate \eqn{(x,y)=(0,0)} and the one in the lower-right corner \eqn{(x,y)=(K-1,N-1)}. } \section{Cell indices and cell-index offsets}{ To simplify addressing of cells, a coordinate-to-index function is used so that each cell can be addressed using a single integer instead (of two). Affymetrix defines the \emph{cell index}, \eqn{i}, of cell \eqn{(x,y)} as \deqn{ i = K*y + x + 1, } where one is added to give indices in \eqn{[1,N*K]}. Continuing, the above definition means that cells are ordered row by row, that is from left to right and from top to bottom, starting at the upper-left corner. For example, with a chip layout \eqn{(N,K)=(1600,1600)} the cell at \eqn{(x,y)=(0,0)} has index i=1, and the cell at \eqn{(x,y)=(1599,1599)} has index \eqn{i=2560000}. A cell at \eqn{(x,y)=(1498,3)} has index \eqn{i=6299}. Given the cell index \eqn{i}, the coordinate \eqn{(x,y)} can be calculated as \deqn{ y = floor((i-1)/K) } \deqn{ x = (i-1)-K*y. } Continuing the above example, the coordinate for cell \eqn{i=1} is be found to be \eqn{(x,y)=(0,0)}, for cell \eqn{i=2560000} it is \eqn{(x,y)=(1599,1599)}, for cell \eqn{i=6299} is it \eqn{(x,y)=(1498,3)}. } \section{Converting between cell indices and (x,y) coordinates in R}{ Although not needed to use the methods in this package, to get the cell indices for the cell coordinates or vice versa, see \code{\link[affy:xy2indices]{xy2indices}()} and \code{indices2xy()} in the \bold{affy} package. } \section{Note on the zero-based "index" field of Affymetrix CDF files}{ An Affymetrix CDF file provides information on which cells should be grouped together. To identify these groups of cells, the cells are specified by their (x,y) coordinates, which are stored as zero-based coordinates in the CDF file. All methods of the \pkg{affxparser} package make use of these (x,y) coordinates, and some methods make it possible to read them as well. However, it is much more common that the methods return cell indices \emph{calculated} from the (x,y) coordinates as explained above. In order to conveniently work with cell indices in \R, the convention in \emph{affxparser} is to use \emph{one-based} indices. Hence the addition (and subtraction) of 1:s in the above equations. This is all taken care of by \pkg{affxparser}. Note that, in addition to (x,y) coordinates, a CDF file also contains a one-based "index" for each cell. This "index" is redundant to the (x,y) coordinate and can be calculated analogously to the above \emph{cell index} while leaving out the addition (subtraction) of 1:s. Importantly, since this "index" is redundant (and exists only in CDF files), we have decided to treat this field as an internal field. Methods of \pkg{affxparser} do neither provide access to nor make use of this internal field. } \author{Henrik Bengtsson} \keyword{documentation} affxparser/man/9._Advanced_-_Cell-index_maps_for_reading_and_writing.Rd0000644000175200017520000001522114516003651027240 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % 909.CellIndexMaps.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{9. Advanced - Cell-index maps for reading and writing} \alias{9. Advanced - Cell-index maps for reading and writing} \title{9. Advanced - Cell-index maps for reading and writing} \description{ This part defines read and write maps that can be used to remap cell indices before reading and writing data from and to file, respectively. This package provides methods to create read and write (cell-index) maps from Affymetrix CDF files. These can be used to store the cell data in an optimal order so that when data is read it is read in contiguous blocks, which is faster. In addition to this, read maps may also be used to read CEL files that have been "reshuffled" by other software. For instance, the dChip software (\url{http://www.dchip.org/}) rotates Affymetrix Exon, Tiling and Mapping 500K data. See example below how to read such data "unrotated". For more details how cell indices are defined, see \code{\link{2. Cell coordinates and cell indices}}. } \section{Motivation}{ When reading data from file, it is faster to read the data in the order that it is stored compared with, say, in a random order. The main reason for this is that the read arm of the hard drive has to move more if data is not read consecutively. Same applies when writing data to file. The read and write cache of the file system may compensate a bit for this, but not completely. In Affymetrix CEL files, cell data is stored in order of cell indices. Moreover, (except for a few early chip types) Affymetrix randomizes the locations of the cells such that cells in the same unit (probeset) are scattered across the array. Thus, when reading CEL data arranged by units using for instance \code{\link{readCelUnits}}(), the order of the cells requested is both random and scattered. Since CEL data is often queried unit by unit (except for some probe-level normalization methods), one can improve the speed of reading data by saving data such that cells in the same unit are stored together. A \emph{write map} is used to remap cell indices to file indices. When later reading that data back, a \emph{read map} is used to remap file indices to cell indices. Read and write maps are described next. } \section{Definition of read and write maps}{ Consider cell indices \eqn{i=1, 2, ..., N*K} and file indices \eqn{j=1, 2, ..., N*K}. A \emph{read map} is then a \emph{bijective} (one-to-one) function \eqn{h()} such that \deqn{ i = h(j), } and the corresponding \emph{write map} is the inverse function \eqn{h^{-1}()} such that \deqn{ j = h^{-1}(i). } Since the mapping is required to be bijective, it holds that \eqn{i = h(h^{-1}(i))} and that \eqn{j = h^{-1}(h(j))}. For example, consider the "reversing" read map function \eqn{h(j)=N*K-j+1}. The write map function is \eqn{h^{-1}(i)=N*K-i+1}. To verify the bijective property of this map, we see that \eqn{h(h^{-1}(i)) = h(N*K-i+1) = N*K-(N*K-i+1)+1 = i} as well as \eqn{h^{-1}(h(j)) = h^{-1}(N*K-j+1) = N*K-(N*K-j+1)+1 = j}. } \section{Read and write maps in R}{ In this package, read and write maps are represented as \code{\link[base]{integer}} \code{\link[base]{vector}}s of length \eqn{N*K} with \emph{unique} elements in \eqn{\{1,2,...,N*K\}}. Consider cell and file indices as in previous section. For example, the "reversing" read map in previous section can be represented as \preformatted{ readMap <- (N*K):1 } Given a \code{\link[base]{vector}} \code{j} of file indices, the cell indices are the obtained as \code{i = readMap[j]}. The corresponding write map is \preformatted{ writeMap <- (N*K):1 } and given a \code{\link[base]{vector}} \code{i} of cell indices, the file indices are the obtained as \code{j = writeMap[i]}. Note also that the bijective property holds for this mapping, that is \code{i == readMap[writeMap[i]]} and \code{i == writeMap[readMap[i]]} are both \code{\link[base:logical]{TRUE}}. Because the mapping is bijective, the write map can be calculated from the read map by: \preformatted{ writeMap <- order(readMap) } and vice versa: \preformatted{ readMap <- order(writeMap) } Note, the \code{\link{invertMap}}() method is much faster than \code{order()}. Since most algorithms for Affymetrix data are based on probeset (unit) models, it is natural to read data unit by unit. Thus, to optimize the speed, cells should be stored in contiguous blocks of units. The methods \code{\link{readCdfUnitsWriteMap}}() can be used to generate a \emph{write map} from a CDF file such that if the units are read in order, \code{\link{readCelUnits}}() will read the cells data in order. Example: \preformatted{ Find any CDF file cdfFile <- findCdf() # Get the order of cell indices indices <- readCdfCellIndices(cdfFile) indices <- unlist(indices, use.names=FALSE) # Get an optimal write map for the CDF file writeMap <- readCdfUnitsWriteMap(cdfFile) # Get the read map readMap <- invertMap(writeMap) # Validate correctness indices2 <- readMap[indices] # == 1, 2, 3, ..., N*K } \emph{Warning}, do not misunderstand this example. It can not be used improve the reading speed of default CEL files. For this, the data in the CEL files has to be rearranged (by the corresponding write map). } \section{Reading rotated CEL files}{ It might be that a CEL file was rotated by another software, e.g. the dChip software rotates Affymetrix Exon, Tiling and Mapping 500K arrays 90 degrees clockwise, which remains rotated when exported as CEL files. To read such data in a non-rotated way, a read map can be used to "unrotate" the data. The 90-degree clockwise rotation that dChip effectively uses to store such data is explained by: \preformatted{ h <- readCdfHeader(cdfFile) # (x,y) chip layout rotated 90 degrees clockwise nrow <- h$cols ncol <- h$rows y <- (nrow-1):0 x <- rep(1:ncol, each=nrow) writeMap <- as.vector(y*ncol + x) } Thus, to read this data "unrotated", use the following read map: \preformatted{ readMap <- invertMap(writeMap) data <- readCel(celFile, indices=1:10, readMap=readMap) } } \author{Henrik Bengtsson} \keyword{documentation} \keyword{internal} affxparser/man/affxparser-package.Rd0000644000175200017520000001505014516003651020533 0ustar00biocbuildbiocbuild\name{affxparser-package} \alias{affxparser-package} \alias{affxparser} \docType{package} \title{Package affxparser} \description{ The \pkg{affxparser} package provides methods for fast and memory efficient parsing of Affymetrix files [1] using the Affymetrix' Fusion SDK [2,3]. Both traditional ASCII- and binary (XDA)-based files are supported, as well as Affymetrix future binary format "Calvin". The efficiency of the parsing is dependent on whether a specific file is binary or ASCII. Currently, there are methods for reading chip definition file (CDF) and a cell intensity file (CEL). These files can be read either in full or in part. For example, probe signals from a few probesets can be extracted very quickly from a set of CEL files into a convenient list structure. } \section{To get started}{ To get started, see: \enumerate{ \item \code{\link{readCelUnits}}() - reads one or several Affymetrix CEL file probeset by probeset. \item \code{\link{readCel}}() - reads an Affymetrix CEL file. by probe. \item \code{\link{readCdf}}() - reads an Affymetrix CDF file. by probe. \item \code{\link{readCdfUnits}}() - reads an Affymetrix CDF file unit by unit. \item \code{\link{readCdfCellIndices}}() - Like \code{readCdfUnits()}, but returns cell indices only, which is often enough to read CEL files unit by unit. \item \code{\link{applyCdfGroups}}() - Re-arranges a CDF structure. \item \code{\link{findCdf}}() - Locates an Affymetrix CDF file by chip type. This page also describes how to setup default search path for CDF files. } } \section{Setting up the CDF search path}{ Some of the functions in this package search for CDF files automatically by scanning certain directories. To add directories to the default search path, see instructions in \code{\link{findCdf}}(). } \section{Future Work}{ Other Affymetrix files can be parsed using the Fusion SDK. Given sufficient interest we will implement this, e.g. DAT files (image files). } \section{Running examples}{ In order to run the examples, data files must exists in the current directory. Otherwise, the example scripts will do nothing. Most of the examples requires a CDF file or a CEL file, or both. Make sure the CDF file is of the same chip type as the CEL file. Affymetrix provides data sets of different types at \url{http://www.affymetrix.com/support/datasets.affx} that can be used. There are both small are very large data sets available. } \section{Technical details}{ This package implements an interface to the Fusion SDK from Affymetrix.com. This SDK (software development kit) is an open source library used for parsing the various files formats used by the Affymetrix platform. The intention is to provide interfaces to most if not all file formats which may be parsed using Fusion. The SDK supports parsing of all the different versions of a specific file format. This means that ASCII, binary as well as the new binary format (codename Calvin) used by Affymetrix is supported through a single API. We also expect any future changes to the file formats to be reflected in the SDK, and subsequently in this package. However, as the current Fusion SDK does not support compressed files, neither does \pkg{affxparser}. This is in contrast to some of the existing code in \bold{affy} and relatives (see below for links). In general we aim to provide functions returning all information in the respective files. Currently it seems that future Affymetrix chip designs may consists of so many features that returning all information will lead to an unnecessary overhead in the case a user only wants access to a subset. We have tried to make this possible. For older file, certain entries in the files have been removed from newer specifications, and the SDK does not provide utilities for reading these entries. This includes for instance the FEAT column of CDF files. Currently the package as well as the Fusion SDK is in beta stage. Bugs may be related to either codebase. We are very interested in users being unable to compile/parse files using this library - this includes users with custom chip designs. In addition, since we aim to return all information stored in the file (and accessible using the Fusion SDK) we would like reports from users being unable to do that. The efficiency of the underlying code may vary with the version of the file being parsed. For example, we currently report the number of outliers present in a CEL file when reading the header of the file using \code{readCelHeader}. In order to obtain this information from text based CEL files (version 2), the entire file needs to be read into memory. With version 3 of the file format, this information is stored in the header. With the introduction of the Fusion SDK (and the next version of their file formats) Affymetrix has made it possible to use multibyte character sets. This implies that character information may be inaccessible if the compiler used to compile the C++ code does not support multibyte character sets (specifically we require that the R installation has defined the macro \code{SUPPORT_MCBS} in the \code{Rconfig.h} header file). For example GCC needs to be version 3.4 or greater on Solaris. In the \code{info} subdirectory of the package installation, information regarding changes to the Fusion SDK is stored, e.g. \preformatted{ pathname <- system.file("info", "changes2fusion.txt", package="affxparser") file.show(pathname) } } \author{ Henrik Bengtsson [aut], James Bullard [aut], Robert Gentleman [ctb], Kasper Daniel Hansen [aut, cre], Martin Morgan [ctb] } \section{Acknowledgments}{ We would like to thanks Ken Simpson (WEHI, Melbourne) and Seth Falcon (FHCRC, Seattle) for feedback and code contributions. } \section{License}{ The releases of this package is licensed under LGPL version 2.1 or newer. This applies also to the Fusion SDK. } \references{ [1] Affymetrix Inc, Affymetrix GCOS 1.x compatible file formats, April, 2006. \url{http://www.affymetrix.com/support/developer/}\cr [2] Affymetrix Inc, Fusion Software Developers Kit (SDK), 2006. \url{http://www.affymetrix.com/support/developer/fusion/}\cr [3] Henrik Bengtsson, unofficial archive of Affymetrix Fusion Software Developers Kit (SDK), \url{https://github.com/HenrikBengtsson/Affx-Fusion-SDK}\cr } \keyword{package} affxparser/man/applyCdfGroupFields.Rd0000644000175200017520000000202414516003651020704 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % applyCdfGroupFields.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{applyCdfGroupFields} \alias{applyCdfGroupFields} \title{Applies a function to a list of fields of each group in a CDF structure} \description{ Applies a function to a list of fields of each group in a CDF structure. } \usage{ applyCdfGroupFields(cdf, fcn, ...) } \arguments{ \item{cdf}{A CDF \code{\link[base]{list}} structure.} \item{fcn}{A \code{\link[base]{function}} that takes a \code{\link[base]{list}} structure of fields and returns an updated \code{\link[base]{list}} of fields.} \item{...}{Arguments passed to the \code{fcn} function.} } \value{ Returns an updated CDF \code{\link[base]{list}} structure. } \author{Henrik Bengtsson} \seealso{ \code{\link{applyCdfGroups}}(). } \keyword{programming} affxparser/man/applyCdfGroups.Rd0000644000175200017520000001171314516003651017745 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % applyCdfGroups.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{applyCdfGroups} \alias{applyCdfGroups} \alias{applyCdfBlocks} \title{Applies a function over the groups in a CDF structure} \description{ Applies a function over the groups in a CDF structure. } \usage{ applyCdfGroups(cdf, fcn, ...) } \arguments{ \item{cdf}{A CDF \code{\link[base]{list}} structure.} \item{fcn}{A \code{\link[base]{function}} that takes a \code{\link[base]{list}} structure of group elements and returns an updated \code{\link[base]{list}} of groups.} \item{...}{Arguments passed to the \code{fcn} function.} } \value{ Returns an updated CDF \code{\link[base]{list}} structure. } \section{Pre-defined restructuring functions}{ \itemize{ \item{Generic:}{ \itemize{ \item \code{\link{cdfGetFields}}() - Gets a subset of groups fields in a CDF structure. \item \code{\link{cdfGetGroups}}() - Gets a subset of groups in a CDF structure. \item \code{\link{cdfOrderBy}}() - Orders the fields according to the value of another field in the same CDF group. \item \code{\link{cdfOrderColumnsBy}}() - Orders the columns of fields according to the values in a certain row of another field in the same CDF group. }} \item{Designed for SNP arrays:}{ \itemize{ \item \code{\link{cdfAddBaseMmCounts}}() - Adds the number of allele A and allele B mismatching nucleotides of the probes in a CDF structure. \item \code{\link{cdfAddProbeOffsets}}() - Adds probe offsets to the groups in a CDF structure. \item \code{\link{cdfGtypeCelToPQ}}() - Function to imitate Affymetrix' \code{gtype_cel_to_pq} software. \item \code{\link{cdfMergeAlleles}}() - Function to join CDF allele A and allele B groups strand by strand. \item \code{\link{cdfMergeStrands}}() - Function to join CDF groups with the same names. }} } We appreciate contributions. } \examples{ ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## cdfFile <- findCdf("Mapping10K_Xba131") # Identify the unit index from the unit name unitName <- "SNP_A-1509436" unit <- which(readCdfUnitNames(cdfFile) == unitName) # Read the CDF file cdf0 <- readCdfUnits(cdfFile, units=unit, stratifyBy="pmmm", readType=FALSE, readDirection=FALSE) cat("Default CDF structure:\n") print(cdf0) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Tabulate the information in each group # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cdf <- readCdfUnits(cdfFile, units=unit) cdf <- applyCdfGroups(cdf, lapply, as.data.frame) print(cdf) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Infer the (true or the relative) offset for probe quartets. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cdf <- applyCdfGroups(cdf0, cdfAddProbeOffsets) cat("Probe offsets:\n") print(cdf) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify the number of nucleotides that mismatch the # allele A and the allele B sequences, respectively. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cdf <- applyCdfGroups(cdf, cdfAddBaseMmCounts) cat("Allele A & B target sequence mismatch counts:\n") print(cdf) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Combine the signals from the sense and the anti-sense # strands in a SNP CEL files. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # First, join the strands in the CDF structure. cdf <- applyCdfGroups(cdf, cdfMergeStrands) cat("Joined CDF structure:\n") print(cdf) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Rearrange values of group fields into quartets. This # requires that the values are already arranged as PMs and MMs. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cdf <- applyCdfGroups(cdf0, cdfMergeAlleles) cat("Probe quartets:\n") print(cdf) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get the x and y cell locations (note, zero-based) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- unlist(applyCdfGroups(cdf, cdfGetFields, "x"), use.names=FALSE) y <- unlist(applyCdfGroups(cdf, cdfGetFields, "y"), use.names=FALSE) # Validate ncol <- readCdfHeader(cdfFile)$cols cells <- as.integer(y*ncol+x+1) cells <- sort(cells) cells0 <- readCdfCellIndices(cdfFile, units=unit) cells0 <- unlist(cells0, use.names=FALSE) cells0 <- sort(cells0) stopifnot(identical(cells0, cells)) ############################################################## } # STOP # ############################################################## } \author{Henrik Bengtsson} \keyword{programming} affxparser/man/arrangeCelFilesByChipType.Rd0000644000175200017520000000350014516003651021765 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % arrangeCelFilesByChipType.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{arrangeCelFilesByChipType} \alias{arrangeCelFilesByChipType} \title{Moves CEL files to subdirectories with names corresponding to the chip types} \description{ Moves CEL files to subdirectories with names corresponding to the chip types according to the CEL file headers. For instance, a HG_U95Av2 CEL file with pathname "data/foo.CEL" will be moved to subdirectory \code{celFiles/HG_U95Av2/}. } \usage{ arrangeCelFilesByChipType(pathnames=list.files(pattern = "[.](cel|CEL)$"), path="celFiles/", aliases=NULL, ...) } \arguments{ \item{pathnames}{A \code{\link[base]{character}} \code{\link[base]{vector}} of CEL pathnames to be moved.} \item{path}{A \code{\link[base]{character}} string specifying the root output directory, which in turn will contain chip-type subdirectories. All directories will be created, if missing.} \item{aliases}{A named \code{\link[base]{character}} string with chip type aliases. For instance, \code{aliases=c("Focus"="HG-Focus")} will treat a CEL file with chiptype label 'Focus' (early-access name) as if it was 'HG-Focus' (official name).} \item{...}{Not used.} } \value{ Returns (invisibly) a named \code{\link[base]{character}} \code{\link[base]{vector}} of the new pathnames with the chip types as the names. Files that could not be moved or where not valid CEL files are set to missing values. } \seealso{ The chip type is inferred from the CEL file header, cf. \code{\link{readCelHeader}}(). } \author{Henrik Bengtsson} \keyword{programming} \keyword{internal} affxparser/man/cdfAddBaseMmCounts.Rd0000644000175200017520000000572214516003651020434 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % cdfAddBaseMmCounts.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{cdfAddBaseMmCounts} \alias{cdfAddBaseMmCounts} \title{Adds the number of allele A and allele B mismatching nucleotides of the probes in a CDF structure} \description{ Adds the number of allele A and allele B mismatching nucleotides of the probes in a CDF structure. This \code{\link[base]{function}} is design to be used with \code{\link{applyCdfGroups}}() on an Affymetrix Mapping (SNP) CDF \code{\link[base]{list}} structure. Identifies the number of nucleotides (bases) in probe sequences that mismatch the the target sequence for allele A and the allele B, as used by [1]. } \usage{ cdfAddBaseMmCounts(groups, ...) } \arguments{ \item{groups}{A \code{\link[base]{list}} structure with groups. Each group must contain the fields \code{tbase}, \code{pbase}, and \code{offset} (from \code{\link{cdfAddProbeOffsets}}()). } \item{...}{Not used.} } \value{ Returns a \code{\link[base]{list}} structure with the same number of groups as the \code{groups} argument. To each group, two fields is added: \item{mmACount}{The number of nucleotides in the probe sequence that mismatches the target sequence of allele A.} \item{mmBCount}{The number of nucleotides in the probe sequence that mismatches the target sequence of allele B.} } \details{ Note that the above counts can be inferred from the CDF structure alone, i.e. no sequence information is required. Consider a probe group interrogating allele A. First, all PM probes matches the allele A target sequence perfectly regardless of shift. Moreover, all these PM probes mismatch the allele B target sequence at exactly one position. Second, all MM probes mismatches the allele A sequence at exactly one position. This is also true for the allele B sequence, \emph{except} for an MM probe with zero offset, which only mismatch at one (the middle) position. For a probe group interrogating allele B, the same rules applies with labels A and B swapped. In summary, the mismatch counts for PM probes can take values 0 and 1, and for MM probes they can take values 0, 1, and 2. } \seealso{ To add required probe offsets, \code{\link{cdfAddProbeOffsets}}(). \code{\link{applyCdfGroups}}(). } \author{Henrik Bengtsson} \references{ [1] LaFramboise T, Weir BA, Zhao X, Beroukhim R, Li C, Harrington D, Sellers WR, and Meyerson M. \emph{Allele-specific amplification in cancer revealed by SNP array analysis}, PLoS Computational Biology, Nov 2005, Volume 1, Issue 6, e65.\cr [2] Affymetrix, \emph{Understanding Genotyping Probe Set Structure}, 2005. \url{http://www.affymetrix.com/support/developer/whitepapers/genotyping_probe_set_structure.affx}\cr } \keyword{programming} \keyword{internal} affxparser/man/cdfAddPlasqTypes.Rd0000644000175200017520000000432114516003651020173 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % cdfAddPlasqTypes.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{cdfAddPlasqTypes} \alias{cdfAddPlasqTypes} \title{Adds the PLASQ types for the probes in a CDF structure} \description{ Adds the PLASQ types for the probes in a CDF structure. This \code{\link[base]{function}} is design to be used with \code{\link{applyCdfGroups}}() on an Affymetrix Mapping (SNP) CDF \code{\link[base]{list}} structure. } \usage{ cdfAddPlasqTypes(groups, ...) } \arguments{ \item{groups}{A \code{\link[base]{list}} structure with groups. Each group must contain the fields \code{tbase}, \code{pbase}, and \code{expos}. } \item{...}{Not used.} } \value{ Returns a \code{\link[base]{list}} structure with the same number of groups as the \code{groups} argument. To each group, one fields is added: \item{plasqType}{A \code{\link[base]{vector}} of \code{\link[base]{integer}}s in [0,15].} } \details{ This function identifies the number of nucleotides (bases) in probe sequences that mismatch the the target sequence for allele A and the allele B, as used by PLASQ [1], and adds an integer [0,15] interpreted as one of 16 probe types. In PLASQ these probe types are referred to as: 0=MMoBR, 1=MMoBF, 2=MMcBR, 3=MMcBF, 4=MMoAR, 5=MMoAF, 6=MMcAR, 7=MMcAF, 8=PMoBR, 9=PMoBF, 10=PMcBR, 11=PMcBF, 12=PMoAR, 13=PMoAF, 14=PMcAR, 15=PMcAF.\cr Pseudo rule for finding out the probe-type value:\cr \itemize{ \item PM/MM: For MMs add 0, for PMs add 8. \item A/B: For Bs add 0, for As add 4. \item o/c: For shifted (o) add 0, for centered (c) add 2. \item R/F: For antisense (R) add 0, for sense (F) add 1. } Example: (PM,A,c,R) = 8 + 4 + 2 + 0 = 14 (=PMcAR) } \author{Henrik Bengtsson} \references{ [1] LaFramboise T, Weir BA, Zhao X, Beroukhim R, Li C, Harrington D, Sellers WR, and Meyerson M. \emph{Allele-specific amplification in cancer revealed by SNP array analysis}, PLoS Computational Biology, Nov 2005, Volume 1, Issue 6, e65.\cr } \keyword{programming} \keyword{internal} affxparser/man/cdfAddProbeOffsets.Rd0000644000175200017520000000251714516003651020474 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % cdfAddProbeOffsets.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{cdfAddProbeOffsets} \alias{cdfAddProbeOffsets} \title{Adds probe offsets to the groups in a CDF structure} \description{ Adds probe offsets to the groups in a CDF structure. This \code{\link[base]{function}} is design to be used with \code{\link{applyCdfGroups}}() on an Affymetrix Mapping (SNP) CDF \code{\link[base]{list}} structure. } \usage{ cdfAddProbeOffsets(groups, ...) } \arguments{ \item{groups}{A \code{\link[base]{list}} structure with groups. Each group must contain the fields \code{tbase}, and \code{expos}. } \item{...}{Not used.} } \value{ Returns a \code{\link[base]{list}} structure with half the number of groups as the \code{groups} argument (since allele A and allele B groups have been joined). } \seealso{ \code{\link{applyCdfGroups}}(). } \author{Henrik Bengtsson} \references{ [1] Affymetrix, \emph{Understanding Genotyping Probe Set Structure}, 2005. \url{http://www.affymetrix.com/support/developer/whitepapers/genotyping_probe_set_structure.affx}\cr } \keyword{programming} \keyword{internal} affxparser/man/cdfGetFields.Rd0000644000175200017520000000224214516003651017323 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % cdfGetFields.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{cdfGetFields} \alias{cdfGetFields} \title{Gets a subset of groups fields in a CDF structure} \description{ Gets a subset of groups fields in a CDF structure. This \code{\link[base]{function}} is designed to be used with \code{\link{applyCdfGroups}}(). } \usage{ cdfGetFields(groups, fields, ...) } \arguments{ \item{groups}{A \code{\link[base]{list}} of groups.} \item{fields}{A \code{\link[base]{character}} \code{\link[base]{vector}} of names of fields to be returned.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{list}} structure of groups. } \details{ Note that an error is \emph{not} generated for missing fields. Instead the field is returned with value \code{\link[base]{NA}}. The reason for this is that it is much faster. } \seealso{ \code{\link{applyCdfGroups}}(). } \author{Henrik Bengtsson} \keyword{programming} \keyword{internal} affxparser/man/cdfGetGroups.Rd0000644000175200017520000000174314516003651017401 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % cdfGetGroups.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{cdfGetGroups} \alias{cdfGetGroups} \title{Gets a subset of groups in a CDF structure} \description{ Gets a subset of groups in a CDF structure. This \code{\link[base]{function}} is designed to be used with \code{\link{applyCdfGroups}}(). } \usage{ cdfGetGroups(groups, which, ...) } \arguments{ \item{groups}{A \code{\link[base]{list}} of groups.} \item{which}{An \code{\link[base]{integer}} or \code{\link[base]{character}} \code{\link[base]{vector}} of groups be returned.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{list}} structure of groups. } \seealso{ \code{\link{applyCdfGroups}}(). } \author{Henrik Bengtsson} \keyword{programming} \keyword{internal} affxparser/man/cdfGtypeCelToPQ.Rd0000644000175200017520000000260614516003651017741 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % cdfGtypeCelToPQ.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{cdfGtypeCelToPQ} \alias{cdfGtypeCelToPQ} \title{Function to imitate Affymetrix' gtype_cel_to_pq software} \description{ Function to imitate Affymetrix' gtype_cel_to_pq software. This \code{\link[base]{function}} is design to be used with \code{\link{applyCdfGroups}}() on an Affymetrix Mapping (SNP) CDF \code{\link[base]{list}} structure. } \usage{ cdfGtypeCelToPQ(groups, ...) } \arguments{ \item{groups}{A \code{\link[base]{list}} structure with groups.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{list}} structure with a single group. The fields in this groups are in turn vectors (all of equal length) where the elements are stored as subsequent quartets (PMA, MMA, PMB, MMB) with all forward-strand quartets first followed by all reverse-strand quartets. } \seealso{ \code{\link{applyCdfGroups}}(). } \author{Henrik Bengtsson} \references{ [1] Affymetrix, \emph{Understanding Genotyping Probe Set Structure}, 2005. \url{http://www.affymetrix.com/support/developer/whitepapers/genotyping_probe_set_structure.affx}\cr } \keyword{programming} \keyword{internal} affxparser/man/cdfHeaderToCelHeader.Rd0000644000175200017520000000203614516003651020706 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % cdfHeaderToCelHeader.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{cdfHeaderToCelHeader} \alias{cdfHeaderToCelHeader} \title{Creates a valid CEL header from a CDF header} \description{ Creates a valid CEL header from a CDF header. } \usage{ cdfHeaderToCelHeader(cdfHeader, sampleName="noname", date=Sys.time(), ..., version="4") } \arguments{ \item{cdfHeader}{A CDF \code{\link[base]{list}} structure.} \item{sampleName}{The name of the sample to be added to the CEL header.} \item{date}{The (scan) date to be added to the CEL header.} \item{...}{Not used.} \item{version}{The file-format version of the generated CEL file. Currently only version 4 is supported.} } \value{ Returns a CDF \code{\link[base]{list}} structure. } \author{Henrik Bengtsson} \keyword{programming} \keyword{internal} affxparser/man/cdfMergeAlleles.Rd0000644000175200017520000000336414516003651020024 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % cdfMergeAlleles.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{cdfMergeAlleles} \alias{cdfMergeAlleles} \title{Function to join CDF allele A and allele B groups strand by strand} \description{ Function to join CDF allele A and allele B groups strand by strand. This \code{\link[base]{function}} is design to be used with \code{\link{applyCdfGroups}}() on an Affymetrix Mapping (SNP) CDF \code{\link[base]{list}} structure. } \usage{ cdfMergeAlleles(groups, compReverseBases=FALSE, collapse="", ...) } \arguments{ \item{groups}{A \code{\link[base]{list}} structure with groups.} \item{compReverseBases}{If \code{\link[base:logical]{TRUE}}, the group names, which typically are names for bases, are turned into their complementary bases for the reverse strand.} \item{collapse}{The \code{\link[base]{character}} string used to collapse the allele A and the allele B group names.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{list}} structure with the two groups \code{forward} and \code{reverse}, if the latter exists. } \details{ Allele A and allele B are merged into a \code{\link[base]{matrix}} where first row hold the elements for allele A and the second elements for allele B. } \seealso{ \code{\link{applyCdfGroups}}(). } \author{Henrik Bengtsson} \references{ [1] Affymetrix, \emph{Understanding Genotyping Probe Set Structure}, 2005. \url{http://www.affymetrix.com/support/developer/whitepapers/genotyping_probe_set_structure.affx}\cr } \keyword{programming} \keyword{internal} affxparser/man/cdfMergeStrands.Rd0000644000175200017520000000270414516003651020056 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % cdfMergeStrands.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{cdfMergeStrands} \alias{cdfMergeStrands} \title{Function to join CDF groups with the same names} \description{ Function to join CDF groups with the same names. This \code{\link[base]{function}} is design to be used with \code{\link{applyCdfGroups}}() on an Affymetrix Mapping (SNP) CDF \code{\link[base]{list}} structure. This can be used to join the sense and anti-sense groups of the same allele in SNP arrays. } \usage{ cdfMergeStrands(groups, ...) } \arguments{ \item{groups}{A \code{\link[base]{list}} structure with groups.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{list}} structure with only two groups. } \details{ If a unit has two strands, they are merged such that the elements for the second strand are concatenated to the end of the elements of first strand (This is done separately for the two alleles). } \seealso{ \code{\link{applyCdfGroups}}(). } \author{Henrik Bengtsson} \references{ [1] Affymetrix, \emph{Understanding Genotyping Probe Set Structure}, 2005. \url{http://www.affymetrix.com/support/developer/whitepapers/genotyping_probe_set_structure.affx}\cr } \keyword{programming} \keyword{internal} affxparser/man/cdfMergeToQuartets.Rd0000644000175200017520000000247114516003651020554 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % cdfMergeToQuartets.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{cdfMergeToQuartets} \alias{cdfMergeToQuartets} \title{Function to re-arrange CDF groups values in quartets} \description{ Function to re-arrange CDF groups values in quartets. This \code{\link[base]{function}} is design to be used with \code{\link{applyCdfGroups}}() on an Affymetrix Mapping (SNP) CDF \code{\link[base]{list}} structure. Note, this requires that the group values have already been arranged in PMs and MMs. } \usage{ cdfMergeToQuartets(groups, ...) } \arguments{ \item{groups}{A \code{\link[base]{list}} structure with groups.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{list}} structure with the two groups \code{forward} and \code{reverse}, if the latter exists. } \seealso{ \code{\link{applyCdfGroups}}(). } \author{Henrik Bengtsson} \references{ [1] Affymetrix, \emph{Understanding Genotyping Probe Set Structure}, 2005. \url{http://www.affymetrix.com/support/developer/whitepapers/genotyping_probe_set_structure.affx}\cr } \keyword{programming} \keyword{internal} affxparser/man/cdfOrderBy.Rd0000644000175200017520000000220414516003651017021 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % cdfOrderBy.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{cdfOrderBy} \alias{cdfOrderBy} \title{Orders the fields according to the value of another field in the same CDF group} \description{ Orders the fields according to the value of another field in the same CDF group. This \code{\link[base]{function}} is design to be used with \code{\link{applyCdfGroups}}() on an Affymetrix Mapping (SNP) CDF \code{\link[base]{list}} structure. } \usage{ cdfOrderBy(groups, field, ...) } \arguments{ \item{groups}{A \code{\link[base]{list}} of groups.} \item{field}{The field whose values are used to order the other fields.} \item{...}{Optional arguments passed \code{\link[base]{order}}().} } \value{ Returns a \code{\link[base]{list}} structure of groups. } \seealso{ \code{\link{cdfOrderColumnsBy}}(). \code{\link{applyCdfGroups}}(). } \author{Henrik Bengtsson} \keyword{programming} \keyword{internal} affxparser/man/cdfOrderColumnsBy.Rd0000644000175200017520000000262214516003651020366 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % cdfOrderColumnsBy.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{cdfOrderColumnsBy} \alias{cdfOrderColumnsBy} \title{Orders the columns of fields according to the values in a certain row of another field in the same CDF group} \description{ Orders the columns of fields according to the values in a certain row of another field in the same CDF group. Note that this method requires that the group fields are matrices. This \code{\link[base]{function}} is design to be used with \code{\link{applyCdfGroups}}() on an Affymetrix Mapping (SNP) CDF \code{\link[base]{list}} structure. } \usage{ cdfOrderColumnsBy(groups, field, row=1, ...) } \arguments{ \item{groups}{A \code{\link[base]{list}} of groups.} \item{field}{The field whose values in row \code{row} are used to order the other fields.} \item{row}{The row of the above field to be used to find the order.} \item{...}{Optional arguments passed \code{\link[base]{order}}().} } \value{ Returns a \code{\link[base]{list}} structure of groups. } \seealso{ \code{\link{cdfOrderBy}}(). \code{\link{applyCdfGroups}}(). } \author{Henrik Bengtsson} \keyword{programming} \keyword{internal} affxparser/man/cdfSetDimension.Rd0000644000175200017520000000174014516003651020060 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % cdfSetDimension.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{cdfSetDimension} \alias{cdfSetDimension} \title{Sets the dimension of an object} \description{ Sets the dimension of an object. This \code{\link[base]{function}} is designed to be used with \code{\link{applyCdfGroupFields}}(). } \usage{ cdfSetDimension(field, dim, ...) } \arguments{ \item{groups}{A \code{\link[base]{list}} of groups.} \item{which}{An \code{\link[base]{integer}} or \code{\link[base]{character}} \code{\link[base]{vector}} of groups be returned.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{list}} structure of groups. } \seealso{ \code{\link{applyCdfGroupFields}}(). } \author{Henrik Bengtsson} \keyword{programming} \keyword{internal} affxparser/man/compareCdfs.Rd0000644000175200017520000000264114516003651017231 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % compareCdfs.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{compareCdfs} \alias{compareCdfs} \title{Compares the contents of two CDF files} \usage{ compareCdfs(pathname, other, quick=FALSE, verbose=0, ...) } \description{ Compares the contents of two CDF files. } \arguments{ \item{pathname}{The pathname of the first CDF file.} \item{other}{The pathname of the seconds CDF file.} \item{quick}{If \code{\link[base:logical]{TRUE}}, only a subset of the units are compared, otherwise all units are compared.} \item{verbose}{An \code{\link[base]{integer}}. The larger the more details are printed.} \item{...}{Not used.} } \value{ Returns \code{\link[base:logical]{TRUE}} if the two CDF are equal, otherwise \code{\link[base:logical]{FALSE}}. If \code{\link[base:logical]{FALSE}}, the attribute \code{reason} contains a string explaining what difference was detected, and the attributes \code{value1} and \code{value2} contain the two objects/values that differs. } \details{ The comparison is done with an upper-limit memory usage, regardless of the size of the CDFs. } \author{Henrik Bengtsson} \seealso{ \code{\link{convertCdf}}(). } \keyword{file} \keyword{IO} affxparser/man/compareCels.Rd0000644000175200017520000000251214516003651017235 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % compareCels.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{compareCels} \alias{compareCels} \title{Compares the contents of two CEL files} \usage{ compareCels(pathname, other, readMap=NULL, otherReadMap=NULL, verbose=0, ...) } \description{ Compares the contents of two CEL files. } \arguments{ \item{pathname}{The pathname of the first CEL file.} \item{other}{The pathname of the seconds CEL file.} \item{readMap}{An optional read map for the first CEL file.} \item{otherReadMap}{An optional read map for the second CEL file.} \item{verbose}{An \code{\link[base]{integer}}. The larger the more details are printed.} \item{...}{Not used.} } \value{ Returns \code{\link[base:logical]{TRUE}} if the two CELs are equal, otherwise \code{\link[base:logical]{FALSE}}. If \code{\link[base:logical]{FALSE}}, the attribute \code{reason} contains a string explaining what difference was detected, and the attributes \code{value1} and \code{value2} contain the two objects/values that differs. } \author{Henrik Bengtsson} \seealso{ \code{\link{convertCel}}(). } \keyword{file} \keyword{IO} affxparser/man/convertCdf.Rd0000644000175200017520000000620014516003651017073 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % convertCdf.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{convertCdf} \alias{convertCdf} \title{Converts a CDF into the same CDF but with another format} \usage{ convertCdf(filename, outFilename, version="4", force=FALSE, ..., .validate=TRUE, verbose=FALSE) } \description{ Converts a CDF into the same CDF but with another format. Currently only CDF files in version 4 (binary/XDA) can be written. However, any input format is recognized. } \arguments{ \item{filename}{The pathname of the original CDF file.} \item{outFilename}{The pathname of the destination CDF file. If the same as the source file, an exception is thrown.} \item{version}{The version of the output file format.} \item{force}{If \code{\link[base:logical]{FALSE}}, and the version of the original CDF is the same as the output version, the new CDF will not be generated, otherwise it will.} \item{...}{Not used.} \item{.validate}{If \code{\link[base:logical]{TRUE}}, a consistency test between the generated and the original CDF is performed. Note that the memory overhead for this can be quite large, because two complete CDF structures are kept in memory at the same time.} \item{verbose}{If \code{\link[base:logical]{TRUE}}, extra details are written while processing.} } \value{ Returns (invisibly) \code{\link[base:logical]{TRUE}} if a new CDF was generated, otherwise \code{\link[base:logical]{FALSE}}. } \section{Benchmarking of ASCII and binary CDFs}{ Binary CDFs are much faster to read than ASCII CDFs. Here are some example for reading complete CDFs (the difference is even larger when reading CDFs in subsets): \itemize{ \item HG-U133A (22283 units): ASCII 11.7s (9.3x), binary 1.20s (1x). \item Hu6800 (7129 units): ASCII 3.5s (6.1x), binary 0.57s (1x). } } \section{Confirmed conversions to binary (XDA) CDFs}{ The following chip types have been converted using \code{convertCdf()} and then verified for correctness using \code{compareCdfs()}: ASCII-to-binary: HG-U133A, Hu6800. Binary-to-binary: Test3. } \examples{ ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## chipType <- "Test3" cdfFiles <- findCdf(chipType, firstOnly=FALSE) cdfFiles <- list( ASCII=grep("ASCII", cdfFiles, value=TRUE), XDA=grep("XDA", cdfFiles, value=TRUE) ) outFile <- file.path(tempdir(), sprintf("\%s.cdf", chipType)) convertCdf(cdfFiles$ASCII, outFile, verbose=TRUE) ############################################################## } # STOP # ############################################################## } \author{Henrik Bengtsson} \seealso{ See \code{\link{compareCdfs}}() to compare two CDF files. \code{\link{writeCdf}}(). } \keyword{file} \keyword{IO} affxparser/man/convertCel.Rd0000644000175200017520000000656314516003651017116 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % convertCel.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{convertCel} \alias{convertCel} \title{Converts a CEL into the same CEL but with another format} \usage{ convertCel(filename, outFilename, readMap=NULL, writeMap=NULL, version="4", newChipType=NULL, ..., .validate=FALSE, verbose=FALSE) } \description{ Converts a CEL into the same CEL but with another format. Currently only CEL files in version 4 (binary/XDA) can be written. However, any input format is recognized. } \arguments{ \item{filename}{The pathname of the original CEL file.} \item{outFilename}{The pathname of the destination CEL file. If the same as the source file, an exception is thrown.} \item{readMap}{An optional read map for the input CEL file.} \item{writeMap}{An optional write map for the output CEL file.} \item{version}{The version of the output file format.} \item{newChipType}{(Only for advanced users who fully understands the Affymetrix CEL file format!) An optional string for overriding the chip type (label) in the CEL file header.} \item{...}{Not used.} \item{.validate}{If \code{\link[base:logical]{TRUE}}, a consistency test between the generated and the original CEL is performed.} \item{verbose}{If \code{\link[base:logical]{TRUE}}, extra details are written while processing.} } \value{ Returns (invisibly) \code{\link[base:logical]{TRUE}} if a new CEL was generated, otherwise \code{\link[base:logical]{FALSE}}. } \section{Benchmarking of ASCII and binary CELs}{ Binary CELs are much faster to read than ASCII CELs. Here are some example for reading complete CELs (the difference is even larger when reading CELs in subsets): \itemize{ \item To do } } \section{WARNING: Changing the chip type label}{ The \code{newChipType} argument changes the label in the part of DAT header that specifies the chip type of the CEL file. Note that it does not change anything else in the CEL file. This type of relabeling is valid for updating the chip type \emph{label} of CEL files that where generated during, say, an "Early Access" period leading to a different chip type label than what more recent CEL files of the same physical chip type have. } \examples{ ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## # Search for some available Calvin CEL files path <- system.file("rawData", package="AffymetrixDataTestFiles") files <- findFiles(pattern="[.](cel|CEL)$", path=path, recursive=TRUE, firstOnly=FALSE) files <- grep("FusionSDK_Test3", files, value=TRUE) files <- grep("Calvin", files, value=TRUE) file <- files[1] outFile <- file.path(tempdir(), gsub("[.]CEL$", ",XBA.CEL", basename(file))) if (file.exists(outFile)) file.remove(outFile) convertCel(file, outFile, .validate=TRUE) ############################################################## } # STOP # ############################################################## } \author{Henrik Bengtsson} \seealso{ \code{\link{createCel}}(). } \keyword{file} \keyword{IO} affxparser/man/copyCel.Rd0000644000175200017520000000204114516003651016373 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % copyCel.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{copyCel} \alias{copyCel} \title{Copies a CEL file} \description{ Copies a CEL file. The file must be a valid CEL file, if not an exception is thrown. } \usage{ copyCel(from, to, overwrite=FALSE, ...) } \arguments{ \item{from}{The filename of the CEL file to be copied.} \item{to}{The filename of destination file.} \item{overwrite}{If \code{\link[base:logical]{FALSE}} and the destination file already exists, an exception is thrown, otherwise not.} \item{...}{Not used.} } \value{ Return \code{\link[base:logical]{TRUE}} if file was successfully copied, otherwise \code{\link[base:logical]{FALSE}}. } \seealso{ \code{\link{isCelFile}}(). } \author{Henrik Bengtsson} \keyword{programming} \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/createCel.Rd0000644000175200017520000000705314516003651016674 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % createCel.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{createCel} \alias{createCel} \title{Creates an empty CEL file} \usage{ createCel(filename, header, nsubgrids=0, overwrite=FALSE, ..., cdf=NULL, verbose=FALSE) } \description{ Creates an empty CEL file. } \arguments{ \item{filename}{The filename of the CEL file to be created.} \item{header}{A \code{\link[base]{list}} structure describing the CEL header, similar to the structure returned by \code{\link{readCelHeader}}(). This header can be of any CEL header version.} \item{overwrite}{If \code{\link[base:logical]{FALSE}} and the file already exists, an exception is thrown, otherwise the file is created.} \item{nsubgrids}{The number of subgrids.} \item{...}{Not used.} \item{cdf}{(optional) The pathname of a CDF file for the CEL file to be created. If given, the CEL header (argument \code{header}) is validated against the CDF header, otherwise not. If \code{\link[base:logical]{TRUE}}, a CDF file is located automatically based using \code{findCdf(header$chiptype)}. } \item{verbose}{An \code{\link[base]{integer}} specifying how much verbose details are outputted.} } \value{ Returns (invisibly) the pathname of the file created. } \details{ Currently only binary (v4) CEL files are supported. The current version of the method does not make use of the Fusion SDK, but its own code to create the CEL file. } \section{Redundant fields in the CEL header}{ There are a few redundant fields in the CEL header. To make sure the CEL header is consistent, redundant fields are cleared and regenerated. For instance, the field for the total number of cells is calculated from the number of cell rows and columns. } \examples{ ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## # Search for first available ASCII CEL file path <- system.file("rawData", package="AffymetrixDataTestFiles") files <- findFiles(pattern="[.](cel|CEL)$", path=path, recursive=TRUE, firstOnly=FALSE) files <- grep("ASCII", files, value=TRUE) file <- files[1] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Read the CEL header # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - hdr <- readCelHeader(file) # Assert that we found an ASCII CEL file, but any will do stopifnot(hdr$version == 3) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Create a CEL v4 file of the same chip type # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - outFile <- file.path(tempdir(), "zzz.CEL") if (file.exists(outFile)) file.remove(outFile) createCel(outFile, hdr, overwrite=TRUE) str(readCelHeader(outFile)) # Verify correctness by update and re-read a few cells intensities <- as.double(1:100) indices <- seq(along=intensities) updateCel(outFile, indices=indices, intensities=intensities) value <- readCel(outFile, indices=indices)$intensities stopifnot(identical(intensities, value)) ############################################################## } # STOP # ############################################################## } \author{Henrik Bengtsson} \keyword{file} \keyword{IO} affxparser/man/findCdf.Rd0000644000175200017520000000564514516003651016347 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % findCdf.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{findCdf} \alias{findCdf} \title{Search for CDF files in multiple directories} \description{ Search for CDF files in multiple directories. } \usage{ findCdf(chipType=NULL, paths=NULL, recursive=TRUE, pattern="[.](c|C)(d|D)(f|F)$", ...) } \arguments{ \item{chipType}{A \code{\link[base]{character}} string of the chip type to search for.} \item{paths}{A \code{\link[base]{character}} \code{\link[base]{vector}} of paths to be searched. The current directory is always searched at the beginning. If \code{\link[base]{NULL}}, default paths are searched. For more details, see below.} \item{recursive}{If \code{\link[base:logical]{TRUE}}, directories are searched recursively.} \item{pattern}{A regular expression file name pattern to match.} \item{...}{Additional arguments passed to \code{\link{findFiles}}().} } \value{ Returns a \code{\link[base]{vector}} of the full pathnames of the files found. } \details{ Note, the current directory is always searched first, but never recursively (unless it is added to the search path explicitly). This provides an easy way to override other files in the search path. If \code{paths} is \code{\link[base]{NULL}}, then a set of default paths are searched. The default search path constitutes: \enumerate{ \item \code{getOption("AFFX_CDF_PATH")} \item \code{Sys.getenv("AFFX_CDF_PATH")} } One of the easiest ways to set system variables for \R is to set them in an \code{.Renviron} file, e.g. \preformatted{ # affxparser: Set default CDF path AFFX_CDF_PATH=${AFFX_CDF_PATH};M:/Affymetrix_2004-100k_trios/cdf AFFX_CDF_PATH=${AFFX_CDF_PATH};M:/Affymetrix_2005-500k_data/cdf } See \code{\link[base]{Startup}} for more details. } \examples{ ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## # Find a specific CDF file cdfFile <- findCdf("Mapping10K_Xba131") print(cdfFile) # Find the first CDF file (no matter what it is) cdfFile <- findCdf() print(cdfFile) # Find all CDF files in search path and display their headers cdfFiles <- findCdf(firstOnly=FALSE) for (cdfFile in cdfFiles) { cat("=======================================\n") hdr <- readCdfHeader(cdfFile) str(hdr) } ############################################################## } # STOP # ############################################################## } \seealso{ This method is used internally by \code{\link{readCelUnits}}() if the CDF file is not specified. } \author{Henrik Bengtsson} \keyword{file} \keyword{IO} affxparser/man/findFiles.Rd0000644000175200017520000000337414516003651016712 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % findFiles.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{findFiles} \alias{findFiles} \title{Finds one or several files in multiple directories} \description{ Finds one or several files in multiple directories. } \usage{ findFiles(pattern=NULL, paths=NULL, recursive=FALSE, firstOnly=TRUE, allFiles=TRUE, ...) } \arguments{ \item{pattern}{A regular expression file name pattern to match.} \item{paths}{A \code{\link[base]{character}} \code{\link[base]{vector}} of paths to be searched.} \item{recursive}{If \code{\link[base:logical]{TRUE}}, the directory structure is searched breath-first, in lexicographic order.} \item{firstOnly}{If \code{\link[base:logical]{TRUE}}, the method returns as soon as a matching file is found, otherwise not.} \item{allFiles}{If \code{\link[base:logical]{FALSE}}, files and directories starting with a period will be skipped, otherwise not.} \item{...}{Arguments passed to \code{\link[base]{list.files}}().} } \value{ Returns a \code{\link[base]{vector}} of the full pathnames of the files found. } \section{Paths}{ The \code{paths} argument may also contain paths specified as semi-colon (\code{";"}) separated paths, e.g. \code{"/usr/;usr/bin/;.;"}. } \section{Windows Shortcut links}{ If package \pkg{R.utils} is available and loaded , Windows Shortcut links (*.lnk) are recognized and can be used to imitate links to directories elsewhere. For more details, see \code{\link[R.utils]{filePath}}. } \author{Henrik Bengtsson} \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/invertMap.Rd0000644000175200017520000000441314516003651016747 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % invertMap.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{invertMap} \alias{invertMap} \title{Inverts a read or a write map} \usage{ invertMap(map, ...) } \description{ Inverts a read or a write map. } \arguments{ \item{map}{An \code{\link[base]{integer}} \code{\link[base]{vector}}.} \item{...}{Not used.} } \value{ Returns an \code{\link[base]{integer}} \code{\link[base]{vector}}. } \details{ An map is defined to be a \code{\link[base]{vector}} of \emph{n} with unique finite values in \eqn{[1,n]}. Finding the inverse of a map is the same as finding the rank of each element, cf. \code{\link[base]{order}}(). However, this method is much faster, because it utilizes the fact that all values are unique and in \eqn{[1,n]}. Moreover, for any map it holds that taking the inverse twice will result in the same map. } \examples{ set.seed(1) # Simulate a read map for a chip with 1.2 million cells nbrOfCells <- 1200000 readMap <- sample(nbrOfCells) # Get the corresponding write map writeMap <- invertMap(readMap) # A map inverted twice should be equal itself stopifnot(identical(invertMap(writeMap), readMap)) # Another example illustrating that the write map is the # inverse of the read map idx <- sample(nbrOfCells, size=1000) stopifnot(identical(writeMap[readMap[idx]], idx)) # invertMap() is much faster than order() t1 <- system.time(invertMap(readMap))[3] cat(sprintf("invertMap() : \%5.2fs [ 1.00x]\n", t1)) t2 <- system.time(writeMap2 <- sort.list(readMap, na.last=NA, method="quick"))[3] cat(sprintf("'quick sort' : \%5.2fs [\%5.2fx]\n", t2, t2/t1)) stopifnot(identical(writeMap, writeMap2)) t3 <- system.time(writeMap2 <- order(readMap))[3] cat(sprintf("order() : \%5.2fs [\%5.2fx]\n", t3, t3/t1)) stopifnot(identical(writeMap, writeMap2)) # Clean up rm(nbrOfCells, idx, readMap, writeMap, writeMap2) } \author{Henrik Bengtsson} \seealso{ To generate an optimized write map for a CDF file, see \code{\link{readCdfUnitsWriteMap}}(). } \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/isCelFile.Rd0000644000175200017520000000172514516003651016644 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % isCelFile.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{isCelFile} \alias{isCelFile} \title{Checks if a file is a CEL file or not} \description{ Checks if a file is a CEL file or not. } \usage{ isCelFile(filename, ...) } \arguments{ \item{filename}{A filename.} \item{...}{Not used.} } \value{ Returns \code{\link[base:logical]{TRUE}} if a CEL file, otherwise \code{\link[base:logical]{FALSE}}. ASCII (v3), binary (v4;XDA), and binary (CCG v1;Calvin) CEL files are recognized. If file does not exist, an exception is thrown. } \seealso{ \code{\link{readCel}}(), \code{\link{readCelHeader}}(), \code{\link{readCelUnits}}(). } \author{Henrik Bengtsson} \keyword{programming} \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/parseDatHeaderString.Rd0000644000175200017520000000176314516003651021052 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % parseDatHeaderString.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{parseDatHeaderString} \alias{parseDatHeaderString} \title{Parses a DAT header string} \description{ Parses a DAT header string. } \usage{ parseDatHeaderString(header, timeFormat="\%m/\%d/\%y \%H:\%M:\%S", ...) } \arguments{ \item{header}{A \code{\link[base]{character}} string.} \item{timeFormat}{The format string used to parse the timestamp. For more details, see \code{\link[base:strptime]{strptime}()}. If \code{\link[base]{NULL}}, no parsing is done.} \item{...}{Not used.} } \value{ Returns named \code{\link[base]{list}} structure. } \seealso{ \code{\link{readCelHeader}}(). } \author{Henrik Bengtsson} \keyword{programming} \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/readBpmap.Rd0000644000175200017520000000454214516003651016700 0ustar00biocbuildbiocbuild\name{readBpmap} \alias{readBpmap} \alias{readBpmapHeader} \alias{readBpmapSeqinfo} \title{Parses a Bpmap file} \description{ Parses (parts of) a Bpmap (binary probe mapping) file from Affymetrix. } \usage{ readBpmap(filename, seqIndices = NULL, readProbeSeq = TRUE, readSeqInfo = TRUE, readPMXY = TRUE, readMMXY = TRUE, readStartPos = TRUE, readCenterPos = FALSE, readStrand = TRUE, readMatchScore = FALSE, readProbeLength = FALSE, verbose = 0) readBpmapHeader(filename) readBpmapSeqinfo(filename, seqIndices = NULL, verbose = 0) } \arguments{ \item{filename}{The filename as a character.} \item{seqIndices}{A vector of integers, detailing the indices of the sequences being read. If \code{NULL}, the entire file is being read.} \item{readProbeSeq}{Do we read the probe sequences.} \item{readSeqInfo}{Do we read the sequence information (a list containing information such as sequence name, number of hits etc.)} \item{readPMXY}{Do we read the (x,y) coordinates of the PM-probes.} \item{readMMXY}{Do we read the (x,y) coordinates of the MM-probes (only relevant if the file has MM information)} \item{readStartPos}{Do we read the start position of the probes.} \item{readCenterPos}{Do we return the start position of the probes.} \item{readStrand}{Do we return the strand of the hits.} \item{readMatchScore}{Do we return the matchscore.} \item{readProbeLength}{Doe we return the probelength.} \item{verbose}{How verbose do we want to be.} } \details{ \code{readBpmap} reads a BPMAP file, which is a binary file containing information about a given probe's location in a sequence. Here sequence means some kind of reference sequence, typically a chromosome or a scaffold. \code{readBpmapHeader} reads the header of the BPMAP file, and \code{readBpmapSeqinfo} reads the sequence info of the sequences (so this function is merely a convenience function). } \value{ For \code{readBpmap}: A list of lists, one list for every sequence read. The components of the sequence lists, depends on the argument of the function call. For \code{readBpmapheader} a list with two components \code{version} and \code{numSequences}. For \code{readBpmapSeqinfo} a list of lists containing the sequence info. } \author{Kasper Daniel Hansen} \seealso{\code{\link{tpmap2bpmap}} for information on how to write Bpmap files.} \keyword{file} \keyword{IO} affxparser/man/readCcg.Rd0000644000175200017520000000465414516003651016341 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % readCcg.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{readCcg} \alias{readCcg} \title{Reads an Affymetrix Command Console Generic (CCG) Data file} \usage{ readCcg(pathname, verbose=0, .filter=NULL, ...) } \description{ Reads an Affymetrix Command Console Generic (CCG) Data file. The CCG data file format is also known as the Calvin file format. } \arguments{ \item{pathname}{The pathname of the CCG file.} \item{verbose}{An \code{\link[base]{integer}} specifying the verbose level. If 0, the file is parsed quietly. The higher numbers, the more details.} \item{.filter}{A \code{\link[base]{list}}.} \item{...}{Not used.} } \value{ A named \code{\link[base]{list}} structure consisting of ... } \details{ Note, the current implementation of this methods does not utilize the Affymetrix Fusion SDK library. Instead, it is implemented in R from the file format definition [1]. } \section{About the CCG file format}{ A CCG file, consists of a "file header", a "generic data header", and "data" section, as outlined here: \itemize{ \item File Header \item Generic Data Header (for the file) \enumerate{ \item Generic Data Header (for the files 1st parent) \enumerate{ \item Generic Data Header (for the files 1st parents 1st parent) \item Generic Data Header (for the files 1st parents 2nd parent) \item ... \item Generic Data Header (for the files 1st parents Mth parent) } \item Generic Data Header (for the files 2nd parent) \item ... \item Generic Data Header (for the files Nth parent) } \item Data \enumerate{ \item Data Group #1 \enumerate{ \item Data Set #1 \itemize{ \item Parameters \item Column definitions \item Matrix of data } \item Data Set #2 \item ... \item Data Set #L } \item Data Group #2 \item ... \item Data Group #K } } } \author{Henrik Bengtsson} \seealso{ \code{\link{readCcgHeader}}(). \code{\link{readCdfUnits}}(). } \references{ [1] Affymetrix Inc, Affymetrix GCOS 1.x compatible file formats, April, 2006. \url{http://www.affymetrix.com/support/developer/}\cr } \keyword{file} \keyword{IO} affxparser/man/readCcgHeader.Rd0000644000175200017520000000263314516003651017445 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % readCcgHeader.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{readCcgHeader} \alias{readCcgHeader} \title{Reads an the header of an Affymetrix Command Console Generic (CCG) file} \usage{ readCcgHeader(pathname, verbose=0, .filter=list(fileHeader = TRUE, dataHeader = TRUE), ...) } \description{ Reads an the header of an Affymetrix Command Console Generic (CCG) file. } \arguments{ \item{pathname}{The pathname of the CCG file.} \item{verbose}{An \code{\link[base]{integer}} specifying the verbose level. If 0, the file is parsed quietly. The higher numbers, the more details.} \item{.filter}{A \code{\link[base]{list}}.} \item{...}{Not used.} } \value{ A named \code{\link[base]{list}} structure consisting of ... } \author{Henrik Bengtsson} \details{ Note, the current implementation of this methods does not utilize the Affymetrix Fusion SDK library. Instead, it is implemented in R from the file format definition [1]. } \seealso{ \code{\link{readCcg}}(). } \references{ [1] Affymetrix Inc, Affymetrix GCOS 1.x compatible file formats, April, 2006. \url{http://www.affymetrix.com/support/developer/}\cr } \keyword{file} \keyword{IO} affxparser/man/readCdf.Rd0000644000175200017520000001205214516003651016330 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % readCdf.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{readCdf} \alias{readCdf} \title{Parsing a CDF file using Affymetrix Fusion SDK} \description{ Parsing a CDF file using Affymetrix Fusion SDK. This function parses a CDF file using the Affymetrix Fusion SDK. \emph{This function will most likely be replaced by the more general \code{\link{readCdfUnits}()} function.} } \usage{ readCdf(filename, units=NULL, readXY=TRUE, readBases=TRUE, readIndexpos=TRUE, readAtoms=TRUE, readUnitType=TRUE, readUnitDirection=TRUE, readUnitNumber=TRUE, readUnitAtomNumbers=TRUE, readGroupAtomNumbers=TRUE, readGroupDirection=TRUE, readIndices=FALSE, readIsPm=FALSE, stratifyBy=c("nothing", "pmmm", "pm", "mm"), verbose=0) } \arguments{ \item{filename}{The filename of the CDF file.} \item{units}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of unit indices specifying which units to be read. If \code{\link[base]{NULL}}, all units are read.} \item{readXY}{If \code{\link[base:logical]{TRUE}}, cell row and column (x,y) coordinates are retrieved, otherwise not.} \item{readBases}{If \code{\link[base:logical]{TRUE}}, cell P and T bases are retrieved, otherwise not.} \item{readIndexpos}{If \code{\link[base:logical]{TRUE}}, cell indexpos are retrieved, otherwise not.} \item{readExpos}{If \code{\link[base:logical]{TRUE}}, cell "expos" values are retrieved, otherwise not.} \item{readUnitType}{If \code{\link[base:logical]{TRUE}}, unit types are retrieved, otherwise not.} \item{readUnitDirection}{If \code{\link[base:logical]{TRUE}}, unit directions are retrieved, otherwise not.} \item{readUnitNumber}{If \code{\link[base:logical]{TRUE}}, unit numbers are retrieved, otherwise not.} \item{readUnitAtomNumbers}{If \code{\link[base:logical]{TRUE}}, unit atom numbers are retrieved, otherwise not.} \item{readGroupAtomNumbers}{If \code{\link[base:logical]{TRUE}}, group atom numbers are retrieved, otherwise not.} \item{readGroupDirection}{If \code{\link[base:logical]{TRUE}}, group directions are retrieved, otherwise not.} \item{readIndices}{If \code{\link[base:logical]{TRUE}}, cell indices \emph{calculated} from the row and column (x,y) coordinates are retrieved, otherwise not. Note that these indices are \emph{one-based}.} \item{readIsPm}{If \code{\link[base:logical]{TRUE}}, cell flags indicating whether the cell is a perfect-match (PM) probe or not are retrieved, otherwise not.} \item{stratifyBy}{A \code{\link[base]{character}} string specifying which and how elements in group fields are returned. If \code{"nothing"}, elements are returned as is, i.e. as \code{\link[base]{vector}}s. If \code{"pm"}/\code{"mm"}, only elements corresponding to perfect-match (PM) / mismatch (MM) probes are returned (as \code{\link[base]{vector}}s). If \code{"pmmm"}, elements are returned as a matrix where the first row holds elements corresponding to PM probes and the second corresponding to MM probes. Note that in this case, it is assumed that there are equal number of PMs and MMs; if not, an error is generated. Moreover, the PMs and MMs may not even be paired, i.e. there is no guarantee that the two elements in a column corresponds to a PM-MM pair.} \item{verbose}{An \code{\link[base]{integer}} specifying the verbose level. If 0, the file is parsed quietly. The higher numbers, the more details.} } \value{ A list with one component for each unit. Every component is again a list with three components \item{groups}{This is again a list with one component for each group (also called block). The information on each group is a list with 5 components, \code{x}, \code{y}, \code{pbase}, \code{tbase}, \code{expos}.} \item{type}{type of the unit.} \item{direction}{direction of the unit.} } \note{ This version of the function does not return information on the QC probes. This will be added in a (near) future release. In addition we expect the header to be part of the returned object. So expect changes to the structure of the value of the function in next release. Please contact the developers for details. } \section{Cell indices are one-based}{ Note that in \pkg{affxparser} all \emph{cell indices} are by convention \emph{one-based}, which is more convenient to work with in \R. For more details on one-based indices, see \code{\link{2. Cell coordinates and cell indices}}. } \author{ James Bullard and Kasper Daniel Hansen. } \seealso{ It is recommended to use \code{\link{readCdfUnits}}() instead of this method. \code{\link{readCdfHeader}}() for getting the header of a CDF file. } \references{ [1] Affymetrix Inc, Affymetrix GCOS 1.x compatible file formats, June 14, 2005. \url{http://www.affymetrix.com/support/developer/} } \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/readCdfCellIndices.Rd0000644000175200017520000000577614516003651020446 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % readCdfCellIndices.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{readCdfCellIndices} \alias{readCdfCellIndices} \title{Reads (one-based) cell indices of units (probesets) in an Affymetrix CDF file} \usage{ readCdfCellIndices(filename, units=NULL, stratifyBy=c("nothing", "pmmm", "pm", "mm"), verbose=0) } \description{ Reads (one-based) cell indices of units (probesets) in an Affymetrix CDF file. } \arguments{ \item{filename}{The filename of the CDF file.} \item{units}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of unit indices specifying which units to be read. If \code{\link[base]{NULL}}, all units are read.} \item{stratifyBy}{A \code{\link[base]{character}} string specifying which and how elements in group fields are returned. If \code{"nothing"}, elements are returned as is, i.e. as \code{\link[base]{vector}}s. If \code{"pm"}/\code{"mm"}, only elements corresponding to perfect-match (PM) / mismatch (MM) probes are returned (as \code{\link[base]{vector}}s). If \code{"pmmm"}, elements are returned as a matrix where the first row holds elements corresponding to PM probes and the second corresponding to MM probes. Note that in this case, it is assumed that there are equal number of PMs and MMs; if not, an error is generated. Moreover, the PMs and MMs may not even be paired, i.e. there is no guarantee that the two elements in a column corresponds to a PM-MM pair.} \item{verbose}{An \code{\link[base]{integer}} specifying the verbose level. If 0, the file is parsed quietly. The higher numbers, the more details.} } \value{ A named \code{\link[base]{list}} where the names corresponds to the names of the units read. Each unit element of the list is in turn a \code{\link[base]{list}} structure with one element \code{groups} which in turn is a \code{\link[base]{list}}. Each group element in \code{groups} is a \code{\link[base]{list}} with a single field named \code{indices}. Thus, the structure is \preformatted{ cdf +- unit #1 | +- "groups" | +- group #1 | | +- "indices" | | group #2 | | +- "indices" | . | +- group #K | +- "indices" +- unit #2 . +- unit #J } This is structure is compatible with what \code{\link{readCdfUnits}}() returns. Note that these indices are \emph{one-based}. } \section{Cell indices are one-based}{ Note that in \pkg{affxparser} all \emph{cell indices} are by convention \emph{one-based}, which is more convenient to work with in \R. For more details on one-based indices, see \code{\link{2. Cell coordinates and cell indices}}. } \author{Henrik Bengtsson} \seealso{ \code{\link{readCdfUnits}}(). } \keyword{file} \keyword{IO} affxparser/man/readCdfDataFrame.Rd0000644000175200017520000000601314516003651020075 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % readCdfDataFrame.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{readCdfDataFrame} \alias{readCdfDataFrame} \title{Reads units (probesets) from an Affymetrix CDF file} \usage{ readCdfDataFrame(filename, units=NULL, groups=NULL, cells=NULL, fields=NULL, drop=TRUE, verbose=0) } \description{ Reads units (probesets) from an Affymetrix CDF file. Gets all or a subset of units (probesets). } \arguments{ \item{filename}{The filename of the CDF file.} \item{units}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of unit indices specifying which units to be read. If \code{\link[base]{NULL}}, all are read.} \item{groups}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of group indices specifying which groups to be read. If \code{\link[base]{NULL}}, all are read.} \item{cells}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of cell indices specifying which cells to be read. If \code{\link[base]{NULL}}, all are read.} \item{fields}{A \code{\link[base]{character}} \code{\link[base]{vector}} specifying what fields to read. If \code{\link[base]{NULL}}, all unit, group and cell fields are returned.} \item{drop}{If \code{\link[base:logical]{TRUE}} and only one field is read, then a \code{\link[base]{vector}} (rather than a single-column \code{\link[base]{data.frame}}) is returned.} \item{verbose}{An \code{\link[base]{integer}} specifying the verbose level. If 0, the file is parsed quietly. The higher numbers, the more details.} } \value{ An NxK \code{\link[base]{data.frame}} or a \code{\link[base]{vector}} of length N. } \author{Henrik Bengtsson} \examples{ ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## # Find any CDF file cdfFile <- findCdf() units <- 101:120 fields <- c("unit", "unitName", "group", "groupName", "cell") df <- readCdfDataFrame(cdfFile, units=units, fields=fields) stopifnot(identical(sort(unique(df$unit)), units)) fields <- c("unit", "unitName", "unitType") fields <- c(fields, "group", "groupName") fields <- c(fields, "x", "y", "cell", "pbase", "tbase") df <- readCdfDataFrame(cdfFile, units=units, fields=fields) stopifnot(identical(sort(unique(df$unit)), units)) ############################################################## } # STOP # ############################################################## } \seealso{ For retrieving the CDF as a \code{\link[base]{list}} structure, see \code{\link[affxparser]{readCdfUnits}}. } \references{ [1] Affymetrix Inc, Affymetrix GCOS 1.x compatible file formats, June 14, 2005. \url{http://www.affymetrix.com/support/developer/} } \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/readCdfGroupNames.Rd0000644000175200017520000000320214516003651020326 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % readCdfGroupNames.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{readCdfGroupNames} \alias{readCdfGroupNames} \title{Reads group names for a set of units (probesets) in an Affymetrix CDF file} \usage{ readCdfGroupNames(filename, units=NULL, truncateGroupNames=TRUE, verbose=0) } \description{ Reads group names for a set of units (probesets) in an Affymetrix CDF file. This is for instance useful for SNP arrays where the nucleotides used for the A and B alleles are the same as the group names. } \arguments{ \item{filename}{The filename of the CDF file.} \item{units}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of unit indices specifying which units to be read. If \code{\link[base]{NULL}}, all units are read.} \item{truncateGroupNames}{A \code{\link[base]{logical}} variable indicating whether unit names should be stripped from the beginning of group names.} \item{verbose}{An \code{\link[base]{integer}} specifying the verbose level. If 0, the file is parsed quietly. The higher numbers, the more details.} } \value{ A named \code{\link[base]{list}} structure where the names of the elements are the names of the units read. Each element is a \code{\link[base]{character}} \code{\link[base]{vector}} with group names for the corresponding unit. } \author{Henrik Bengtsson} \seealso{ \code{\link{readCdfUnits}}(). } \keyword{file} \keyword{IO} affxparser/man/readCdfHeader.Rd0000644000175200017520000000204414516003651017441 0ustar00biocbuildbiocbuild\name{readCdfHeader} \alias{readCdfHeader} \title{Reads the header associated with an Affymetrix CDF file} \description{ Reads the header of an Affymetrix CDF file using the Fusion SDK. } \usage{ readCdfHeader(filename) } \arguments{ \item{filename}{name of the CDF file.} } \value{ A named list with the following components: \item{rows}{the number of rows on the chip.} \item{cols}{the number of columns on the chip.} \item{probesets}{the number of probesets on the chip.} \item{qcprobesets}{the number of QC probesets on the chip.} \item{reference}{the reference sequence (this component only exists for resequencing chips).} \item{chiptype}{the type of the chip.} \item{filename}{the name of the cdf file.} } \examples{ for (zzz in 0) { # Find any CDF file cdfFile <- findCdf() if (is.null(cdfFile)) break header <- readCdfHeader(cdfFile) print(header) } # for (zzz in 0) } \author{ James Bullard and Kasper Daniel Hansen } \seealso{ \code{\link{readCdfUnits}()}. } \keyword{file} \keyword{IO} affxparser/man/readCdfIsPm.Rd0000644000175200017520000000227014516003651017122 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % readCdfIsPm.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{readCdfIsPm} \alias{readCdfIsPm} \title{Checks if cells in a CDF file are perfect-match probes or not} \usage{ readCdfIsPm(filename, units=NULL, verbose=0) } \description{ Checks if cells in a CDF file are perfect-match probes or not. } \arguments{ \item{filename}{The filename of the CDF file.} \item{units}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of unit indices specifying which units to be read. If \code{\link[base]{NULL}}, all units are read.} \item{verbose}{An \code{\link[base]{integer}} specifying the verbose level. If 0, the file is parsed quietly. The higher numbers, the more details.} } \value{ A named \code{\link[base]{list}} of named \code{\link[base]{logical}} vectors. The name of the list elements are unit names and the names of the logical vector are group names. } \author{Henrik Bengtsson} \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/readCdfNbrOfCellsPerUnitGroup.Rd0000644000175200017520000000674414516003651022601 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % readCdfNbrOfCellsPerUnitGroup.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{readCdfNbrOfCellsPerUnitGroup} \alias{readCdfNbrOfCellsPerUnitGroup} \title{Gets the number of cells (probes) that each group of each unit in a CDF file} \usage{ readCdfNbrOfCellsPerUnitGroup(filename, units=NULL, verbose=0) } \description{ Gets the number of cells (probes) that each group of each unit in a CDF file. } \arguments{ \item{filename}{The filename of the CDF file.} \item{units}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of unit indices specifying which units to be read. If \code{\link[base]{NULL}}, all units are read.} \item{verbose}{An \code{\link[base]{integer}} specifying the verbose level. If 0, the file is parsed quietly. The higher numbers, the more details.} } \value{ A named \code{\link[base]{list}} of named \code{\link[base]{integer}} vectors. The name of the list elements are unit names and the names of the integer vector are group names. } \examples{ ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## cdfFile <- findCdf("Mapping10K_Xba131") groups <- readCdfNbrOfCellsPerUnitGroup(cdfFile) # Number of units read print(length(groups)) ## 11564 # Details on two units print(groups[56:57]) ## $`SNP_A-1516438` ## SNP_A-1516438C SNP_A-1516438T SNP_A-1516438C SNP_A-1516438T ## 10 10 10 10 ## ## $`SNP_A-1508602` ## SNP_A-1508602A SNP_A-1508602G SNP_A-1508602A SNP_A-1508602G ## 10 10 10 10 # Number of groups with different number of cells print(table(unlist(groups))) ## 10 60 ## 46240 4 # Number of cells per unit nbrOfCellsPerUnit <- unlist(lapply(groups, FUN=sum)) print(table(nbrOfCellsPerUnit)) nbrOfCellsPerUnit ## 40 60 ## 11560 4 # Number of groups per unit nbrOfGroupsPerUnit <- unlist(lapply(groups, FUN=length)) # Details on a few units print(nbrOfGroupsPerUnit[20:30]) ## SNP_A-1512666 SNP_A-1512740 SNP_A-1512132 SNP_A-1516082 SNP_A-1511962 ## 4 4 4 4 4 ## SNP_A-1515637 SNP_A-1515878 SNP_A-1518789 SNP_A-1518296 SNP_A-1519701 ## 4 4 4 4 4 ## SNP_A-1511743 ## 4 # Number of units for each unique number of groups print(table(nbrOfGroupsPerUnit)) ## nbrOfGroupsPerUnit ## 1 4 ## 4 11560 x <- list() for (size in unique(nbrOfGroupsPerUnit)) { subset <- groups[nbrOfGroupsPerUnit==size] t <- matrix(unlist(subset), nrow=size) colnames(t) <- names(subset) x[[as.character(size)]] <- t rm(subset, t) } # Check if there are any quartet units where the number # of cells in Group 1 & 2 or Group 3 & 4 does not have # the same number of cells. # Group 1 & 2 print(sum(x[["4"]][1,]-x[["4"]][2,] != 0)) # 0 # Group 3 & 4 print(sum(x[["4"]][3,]-x[["4"]][4,] != 0)) # 0 ############################################################## } # STOP # ############################################################## } \author{Henrik Bengtsson} \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/readCdfQc.Rd0000644000175200017520000000122014516003651016607 0ustar00biocbuildbiocbuild\name{readCdfQc} \alias{readCdfQc} \title{Reads the QC units of CDF file} \description{ Reads the QC units of CDF file. } \usage{ readCdfQc(filename, units = NULL, verbose = 0) } \arguments{ \item{filename}{name of the CDF file.} \item{units}{The QC unit indices as a vector of integers. \code{NULL} indicates that all units should be read.} \item{verbose}{how verbose should the output be. 0 means no output, with higher numbers being more verbose.} } \value{ A list with one component for each QC unit. } \author{ Kasper Daniel Hansen } \seealso{ \code{\link{readCdf}()}. } \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/readCdfUnitNames.Rd0000644000175200017520000000213714516003651020157 0ustar00biocbuildbiocbuild \name{readCdfUnitNames} \alias{readCdfUnitNames} \title{Reads unit (probeset) names from an Affymetrix CDF file} \usage{readCdfUnitNames(filename, units=NULL, verbose=0)} \description{ Gets the names of all or a subset of units (probesets) in an Affymetrix CDF file. This can be used to get a map between unit names an the internal unit indices used by the CDF file. } \arguments{ \item{filename}{The filename of the CDF file.} \item{units}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of unit indices specifying which units to be read. If \code{\link[base]{NULL}}, all units are read.} \item{verbose}{An \code{\link[base]{integer}} specifying the verbose level. If 0, the file is parsed quietly. The higher numbers, the more details.} } \value{ A \code{\link[base]{character}} \code{\link[base]{vector}} of unit names. } \author{Henrik Bengtsson (\url{http://www.braju.com/R/})} \seealso{ \code{\link{readCdfUnits}}(). } \examples{\dontrun{See help(readCdfUnits) for an example}} \keyword{file} \keyword{IO} affxparser/man/readCdfUnits.Rd0000644000175200017520000001257214516003651017362 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % readCdfUnits.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{readCdfUnits} \alias{readCdfUnits} \title{Reads units (probesets) from an Affymetrix CDF file} \usage{ readCdfUnits(filename, units=NULL, readXY=TRUE, readBases=TRUE, readExpos=TRUE, readType=TRUE, readDirection=TRUE, stratifyBy=c("nothing", "pmmm", "pm", "mm"), readIndices=FALSE, verbose=0) } \description{ Reads units (probesets) from an Affymetrix CDF file. Gets all or a subset of units (probesets). } \arguments{ \item{filename}{The filename of the CDF file.} \item{units}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of unit indices specifying which units to be read. If \code{\link[base]{NULL}}, all units are read.} \item{readXY}{If \code{\link[base:logical]{TRUE}}, cell row and column (x,y) coordinates are retrieved, otherwise not.} \item{readBases}{If \code{\link[base:logical]{TRUE}}, cell P and T bases are retrieved, otherwise not.} \item{readExpos}{If \code{\link[base:logical]{TRUE}}, cell "expos" values are retrieved, otherwise not.} \item{readType}{If \code{\link[base:logical]{TRUE}}, unit types are retrieved, otherwise not.} \item{readDirection}{If \code{\link[base:logical]{TRUE}}, unit \emph{and} group directions are retrieved, otherwise not.} \item{stratifyBy}{A \code{\link[base]{character}} string specifying which and how elements in group fields are returned. If \code{"nothing"}, elements are returned as is, i.e. as \code{\link[base]{vector}}s. If \code{"pm"}/\code{"mm"}, only elements corresponding to perfect-match (PM) / mismatch (MM) probes are returned (as \code{\link[base]{vector}}s). If \code{"pmmm"}, elements are returned as a matrix where the first row holds elements corresponding to PM probes and the second corresponding to MM probes. Note that in this case, it is assumed that there are equal number of PMs and MMs; if not, an error is generated. Moreover, the PMs and MMs may not even be paired, i.e. there is no guarantee that the two elements in a column corresponds to a PM-MM pair.} \item{readIndices}{If \code{\link[base:logical]{TRUE}}, cell indices \emph{calculated} from the row and column (x,y) coordinates are retrieved, otherwise not. Note that these indices are \emph{one-based}.} \item{verbose}{An \code{\link[base]{integer}} specifying the verbose level. If 0, the file is parsed quietly. The higher numbers, the more details.} } \value{ A named \code{\link[base]{list}} where the names corresponds to the names of the units read. Each element of the list is in turn a \code{\link[base]{list}} structure with three components: \item{groups}{A \code{\link[base]{list}} with one component for each group (also called block). The information on each group is a \code{\link[base]{list}} of up to seven components: \code{x}, \code{y}, \code{pbase}, \code{tbase}, \code{expos}, \code{indices}, and \code{direction}. All fields but the latter have the same number of values as there are cells in the group. The latter field has only one value indicating the direction for the whole group. } \item{type}{An \code{\link[base]{integer}} specifying the type of the unit, where 1 is "expression", 2 is "genotyping", 3 is "CustomSeq", and 4 "tag".} \item{direction}{An \code{\link[base]{integer}} specifying the direction of the unit, which defines if the probes are interrogating the sense or the anti-sense target, where 0 is "no direction", 1 is "sense", and 2 is "anti-sense".} } \section{Cell indices are one-based}{ Note that in \pkg{affxparser} all \emph{cell indices} are by convention \emph{one-based}, which is more convenient to work with in \R. For more details on one-based indices, see \code{\link{2. Cell coordinates and cell indices}}. } \author{ James Bullard and Kasper Daniel Hansen. Modified by Henrik Bengtsson to read any subset of units and/or subset of parameters, to stratify by PM/MM, and to return cell indices. } \examples{ ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## # Find any CDF file cdfFile <- findCdf() # Read all units in a CDF file [~20s => 0.34ms/unit] cdf0 <- readCdfUnits(cdfFile, readXY=FALSE, readExpos=FALSE) # Read a subset of units in a CDF file [~6ms => 0.06ms/unit] units1 <- c(5, 100:109, 34) cdf1 <- readCdfUnits(cdfFile, units=units1, readXY=FALSE, readExpos=FALSE) stopifnot(identical(cdf1, cdf0[units1])) rm(cdf0) # Create a unit name to index map names <- readCdfUnitNames(cdfFile) units2 <- match(names(cdf1), names) stopifnot(all.equal(units1, units2)) cdf2 <- readCdfUnits(cdfFile, units=units2, readXY=FALSE, readExpos=FALSE) stopifnot(identical(cdf1, cdf2)) ############################################################## } # STOP # ############################################################## } \seealso{ \code{\link{readCdfCellIndices}}(). } \references{ [1] Affymetrix Inc, Affymetrix GCOS 1.x compatible file formats, June 14, 2005. \url{http://www.affymetrix.com/support/developer/} } \keyword{file} \keyword{IO} affxparser/man/readCdfUnitsWriteMap.Rd0000644000175200017520000001515114516003651021027 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % readCdfUnitsWriteMap.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{readCdfUnitsWriteMap} \alias{readCdfUnitsWriteMap} \title{Generates an Affymetrix cell-index write map from a CDF file} \usage{ readCdfUnitsWriteMap(filename, units=NULL, ..., verbose=FALSE) } \description{ Generates an Affymetrix cell-index write map from a CDF file. The purpose of this method is to provide a re-ordering of cell elements such that cells in units (probesets) can be stored in contiguous blocks. When reading cell elements unit by unit, minimal file re-position is required resulting in a faster reading. Note: At the moment does this package not provide methods to write/reorder CEL files. In the meanwhile, you have to write and re-read using your own file format. That's not too hard using \code{writeBin()} and \code{\link[base]{readBin}}(). } \arguments{ \item{filename}{The pathname of the CDF file.} \item{units}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of unit indices specifying which units to listed first. All other units are added in order at the end. If \code{\link[base]{NULL}}, units are in order.} \item{...}{Additional arguments passed to \code{\link{readCdfUnits}}().} \item{verbose}{Either a \code{\link[base]{logical}}, a \code{\link[base]{numeric}}, or a \code{\link[R.utils]{Verbose}} object specifying how much verbose/debug information is written to standard output. If a Verbose object, how detailed the information is is specified by the threshold level of the object. If a numeric, the value is used to set the threshold of a new Verbose object. If \code{\link[base:logical]{TRUE}}, the threshold is set to -1 (minimal). If \code{\link[base:logical]{FALSE}}, no output is written (and neither is the \pkg{R.utils} package required).} } \value{ A \code{\link[base]{integer}} \code{\link[base]{vector}} which is a \emph{write} map. } \author{Henrik Bengtsson} \examples{ ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## # Find any CDF file cdfFile <- findCdf() # Create a cell-index map (for writing) writeMap <- readCdfUnitsWriteMap(cdfFile) # Inverse map to be used to read cell elements such that, when read # read unit by unit, they are read much faster. readMap <- invertMap(writeMap) # Validate the two maps stopifnot(identical(readMap[writeMap], 1:length(readMap))) cat("Summary of the \"randomness\" of the cell indices:\n") moves <- diff(readMap) - 1 cat(sprintf("Number of unnecessary file re-positioning: \%d (\%.1f\%\%)\n", sum(moves != 0), 100*sum(moves != 0)/length(moves))) cat(sprintf("Extra positioning: \%.1fGb\n", sum(abs(moves))/1024^3)) smallMoves <- moves[abs(moves) <= 25]; largeMoves <- moves[abs(moves) > 25]; layout(matrix(1:2)) main <- "Non-signed file moves required in unorded file" hist(smallMoves, nclass=51, main=main, xlab="moves <=25 bytes") hist(largeMoves, nclass=101, main="", xlab="moves >25 bytes") # Clean up layout(1) rm(cdfFile, readMap, writeMap, moves, smallMoves, largeMoves, main) ############################################################## } # STOP # ############################################################## ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Function to read Affymetrix probeset annotations # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - readAffymetrixProbesetAnnotation <- function(pathname, ...) { # Get headers header <- scan(pathname, what="character", sep=",", quote="\"", quiet=TRUE, nlines=1); # Read only a subset of columns (unique to this example) cols <- c("Probe Set ID"="probeSet", "Chromosome"="chromosome", "Physical Position"="physicalPosition", "dbSNP RS ID"="dbSnpId"); colClasses <- rep("NULL", length(header)); colClasses[header \%in\% names(cols)] <- "character"; # Read the data (this is what takes time) df <- read.table(pathname, colClasses=colClasses, header=TRUE, sep=",", quote="\"", na.strings="---", strip.white=TRUE, check.names=FALSE, blank.lines.skip=FALSE, fill=FALSE, comment.char="", ...); # Re-order columns df <- df[,match(names(cols),colnames(df))]; colnames(df) <- cols; # Use "Probe Set ID" as rownames. Note that if we use 'row.names=1' # or similar something goes wrong. /HB 2006-03-06 rownames(df) <- df[[1]]; df <- df[,-1]; # Change types of columns df[[1]] <- factor(df[[1]], levels=c(1:22,"X","Y",NA), ordered=TRUE); df[[2]] <- as.integer(df[[2]]); df; } # readAffymetrixProbesetAnnotation() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Main # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (zz in 1) { # Chip to be remapped chipType <- "Mapping50K_Xba240" annoFile <- paste(chipType, "_annot.csv", sep="") cdfFile <- findCdf(chipType) if (is.null(cdfFile) || !file.exists(annoFile)) break; # Read SNP location details snpInfo <- readAffymetrixProbesetAnnotation(annoFile) # Order by chromsome and then physical position o <- order(snpInfo[[1]], snpInfo[[2]]) snpInfo <- snpInfo[o,] rm(o) # Read unit names in CDF file unitNames <- readCdfUnitNames(cdfFile) # The CDF unit indices sorted by chromsomal position units <- match(rownames(snpInfo), unitNames) # ...and cell indices in the same order writeMap <- readCdfUnitsWriteMap(cdfFile, units=units) # Inverse map to be used to write cell elements such that, if they # later are read unit by unit, they are read in contiguous blocks. readMap <- invertMap(writeMap) # Clean up rm(chipType, annoFile, cdfFile, snpInfo, unitNames, units, readMap, writeMap) } # for (zz in 1) ############################################################## } # STOP # ############################################################## } \seealso{ To invert maps, see \code{\link{invertMap}}(). \code{\link{readCel}}() and \code{\link{readCelUnits}}(). } \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/readCel.Rd0000644000175200017520000001316614516003651016346 0ustar00biocbuildbiocbuild\name{readCel} \alias{readCel} \title{Reads an Affymetrix CEL file} \description{ This function reads all or a subset of the data in an Affymetrix CEL file. } \usage{ readCel(filename, indices = NULL, readHeader = TRUE, readXY = FALSE, readIntensities = TRUE, readStdvs = FALSE, readPixels = FALSE, readOutliers = TRUE, readMasked = TRUE, readMap = NULL, verbose = 0, .checkArgs = TRUE) } \arguments{ \item{filename}{the name of the CEL file.} \item{indices}{a vector of indices indicating which features to read. If the argument is \code{NULL} all features will be returned.} \item{readXY}{a logical: will the (x,y) coordinates be returned.} \item{readIntensities}{a logical: will the intensities be returned.} \item{readStdvs}{a logical: will the standard deviations be returned.} \item{readPixels}{a logical: will the number of pixels be returned.} \item{readOutliers}{a logical: will the outliers be return.} \item{readMasked}{a logical: will the masked features be returned.} \item{readHeader}{a logical: will the header of the file be returned.} \item{readMap}{A \code{\link[base]{vector}} remapping cell indices to file indices. If \code{\link[base]{NULL}}, no mapping is used.} \item{verbose}{how verbose do we want to be. 0 is no verbosity, higher numbers mean more verbose output. At the moment the values 0, 1 and 2 are supported.} \item{.checkArgs}{If \code{TRUE}, the arguments will be validated, otherwise not. \emph{Warning: This should only be used if the arguments have been validated elsewhere!}} } \value{ A CEL files consists of a \emph{header}, a set of \emph{cell values}, and information about \emph{outliers} and \code{masked} cells. The cell values, which are values extract for each cell (aka feature or probe), are the (x,y) coordinate, intensity and standard deviation estimates, and the number of pixels in the cell. If \code{readIndices=NULL}, cell values for all cells are returned, Only cell values specified by argument \code{readIndices} are returned. This value returns a named list with components described below: \item{\code{header}}{The header of the CEL file. Equivalent to the output from \code{readCelHeader}, see the documentation for that function.} \item{x,y}{(cell values) Two \code{integer} vectors containing the x and y coordinates associated with each feature.} \item{\code{intensities}}{(cell value) A \code{numeric} vector containing the intensity associated with each feature.} \item{stdvs}{(cell value) A \code{numeric} vector containing the standard deviation associated with each feature.} \item{pixels}{(cell value) An \code{integer} vector containing the number of pixels associated with each feature.} \item{outliers}{An \code{integer} vector of indices specifying which of the queried cells that are flagged as outliers. Note that there is a difference between \code{outliers=NULL} and \code{outliers=integer(0)}; the last case happens when \code{readOutliers=TRUE} but there are no outliers.} \item{masked}{An \code{integer} vector of indices specifying which of the queried cells that are flagged as masked. Note that there is a difference between \code{masked=NULL} and \code{masked=integer(0)}; the last case happens when \code{readMasked=TRUE} but there are no masked features.} The elements of the cell values are ordered according to argument \code{indices}. The lengths of the cell-value elements equals the number of cells read. Which of the above elements that are returned are controlled by the \code{readNnn} arguments. If \code{FALSE}, the corresponding element above is \code{NULL}, e.g. if \code{readStdvs=FALSE} then \code{stdvs} is \code{NULL}. } \section{Outliers and masked cells}{ The Affymetrix image analysis software flags cells as outliers and masked. This method does not return these flags, but instead vectors of cell indices listing which cells \emph{of the queried cells} are outliers and masked, respectively. The current community view seems to be that this should be done based on statistical modeling of the actual probe intensities and should be based on the choice of preprocessing algorithm. Most algorithms are only using the intensities from the CEL file. } \section{Memory usage}{ The Fusion SDK allocates memory for the entire CEL file, when the file is accessed (but does not actually read the file into memory). Using the \code{indices} argument will therefore only affect the memory use of the final object (as well as speed), not the memory allocated in the C function used to parse the file. This should be a minor problem however. } \section{Troubleshooting}{ It is considered a bug if the file contains information not accessible by this function, please report it. } \examples{ for (zzz in 0) { # Only so that 'break' can be used # Scan current directory for CEL files celFiles <- list.files(pattern="[.](c|C)(e|E)(l|L)$") if (length(celFiles) == 0) break; celFile <- celFiles[1] # Read a subset of cells idxs <- c(1:5, 1250:1500, 450:440) cel <- readCel(celFile, indices=idxs, readOutliers=TRUE) str(cel) # Clean up rm(celFiles, celFile, cel) } # for (zzz in 0) } \author{James Bullard and Kasper Daniel Hansen} \seealso{ \code{\link{readCelHeader}()} for a description of the header output. Often a user only wants to read the intensities, look at \code{\link{readCelIntensities}()} for a function specialized for that use. } \keyword{file} \keyword{IO} affxparser/man/readCelHeader.Rd0000644000175200017520000000535514516003651017460 0ustar00biocbuildbiocbuild\name{readCelHeader} \alias{readCelHeader} \title{Parsing the header of an Affymetrix CEL file} \description{ Reads in the header of an Affymetrix CEL file using the Fusion SDK. } \usage{ readCelHeader(filename) } \arguments{ \item{filename}{the name of the CEL file.} } \details{ This function returns the header of a CEL file. Affymetrix operates with different versions of this file format. Depending on what version is being read, different information is accessible. } \value{ A named list with components described below. The entries are obtained from the Fusion SDK interface functions. We try to obtain all relevant information from the file. \item{filename}{the name of the cel file.} \item{version}{the version of the cel file.} \item{cols}{the number of columns on the chip.} \item{rows}{the number of rows on the chip.} \item{total}{the total number of features on the chip. Usually equal to \code{rows} times \code{cols}, but since it is a separate attribute in the SDK we decided to include it anyway.} \item{algorithm}{the algorithm used to create the CEL file.} \item{parameters}{the parameters used in the algorithm. Seems to be semi-colon separated.} \item{chiptype}{the type of the chip.} \item{header}{the entire header of the CEL file. Only available for non-calvin format files.} \item{datheader}{the entire dat header of the CEL file. This contains for example a date.} \item{librarypackage}{the library package name of the file. Empty for older versions.} \item{cellmargin}{a parameter used to generate the CEL file. According to Affymetrix, it designates the number of pixels to ignore around the feature border when calculating the intensity value (the number of pixels ignored are cellmargin divided by 2).} \item{noutliers}{the number of features reported as outliers.} \item{nmasked}{the number of features reported as masked.} } \author{ James Bullard and Kasper Daniel Hansen } \note{ Memory usage:the Fusion SDK allocates memory for the entire CEL file, when the file is accessed. The memory footprint of this function will therefore seem to be (rather) large. Speed: CEL files of version 2 (standard text files) needs to be completely read in order to report the number of outliers and masked features. } \seealso{ \code{\link{readCel}()} for reading in the entire CEL file. That function also returns the header. See \code{affxparserInfo} for general comments on the package and the Fusion SDK. } \examples{ # Scan current directory for CEL files files <- list.files(pattern="[.](c|C)(e|E)(l|L)$") if (length(files) > 0) { header <- readCelHeader(files[1]) print(header) rm(header) } # Clean up rm(files) } \keyword{file} \keyword{IO} affxparser/man/readCelIntensities.Rd0000644000175200017520000000331614516003651020561 0ustar00biocbuildbiocbuild\name{readCelIntensities} \alias{readCelIntensities} \title{ Reads the intensities contained in several Affymetrix CEL files } \description{ Reads the intensities of several Affymetrix CEL files (as opposed to \code{readCel}() which only reads a single file). } \usage{ readCelIntensities(filenames, indices = NULL, ..., verbose = 0) } \arguments{ \item{filenames}{the names of the CEL files as a character vector.} \item{indices}{a vector of which indices should be read. If the argument is \code{NULL} all features will be returned.} \item{...}{Additional arguments passed to \code{readCel}().} \item{verbose}{an integer: how verbose do we want to be, higher means more verbose.} } \details{ The function will initially allocate a matrix with the same memory footprint as the final object. } \value{ A matrix with a number of rows equal to the length of the \code{indices} argument (or the number of features on the entire chip), and a number of columns equal to the number of files. The columns are ordered according to the \code{filenames} argument. } \author{ James Bullard and Kasper Daniel Hansen } \note{ Currently this function builds on \code{readCel}(), and simply calls this function multiple times. If testing yields sufficient reasons for doing so, it may be re-implemented in C++. } \seealso{ \code{\link{readCel}}() for a discussion of a more versatile function, particular with details of the \code{indices} argument. } \examples{ # Scan current directory for CEL files files <- list.files(pattern="[.](c|C)(e|E)(l|L)$") if (length(files) >= 2) { cel <- readCelIntensities(files[1:2]) str(cel) rm(cel) } # Clean up rm(files) } \keyword{file} \keyword{IO} affxparser/man/readCelRectangle.Rd0000644000175200017520000000525014516003651020166 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % readCelRectangle.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{readCelRectangle} \alias{readCelRectangle} \title{Reads a spatial subset of probe-level data from Affymetrix CEL files} \usage{ readCelRectangle(filename, xrange=c(0, Inf), yrange=c(0, Inf), ..., asMatrix=TRUE) } \description{ Reads a spatial subset of probe-level data from Affymetrix CEL files. } \arguments{ \item{filename}{The pathname of the CEL file.} \item{xrange}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length two giving the left and right coordinates of the cells to be returned.} \item{yrange}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length two giving the top and bottom coordinates of the cells to be returned.} \item{...}{Additional arguments passed to \code{\link{readCel}}().} \item{asMatrix}{If \code{\link[base:logical]{TRUE}}, the CEL data fields are returned as matrices with element (1,1) corresponding to cell (xrange[1],yrange[1]).} } \value{ A named \code{\link[base]{list}} CEL structure similar to what \code{\link{readCel}}(). In addition, if \code{asMatrix} is \code{\link[base:logical]{TRUE}}, the CEL data fields are returned as matrices, otherwise not. } \author{Henrik Bengtsson} \examples{ ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## rotate270 <- function(x, ...) { x <- t(x) nc <- ncol(x) if (nc < 2) return(x) x[,nc:1,drop=FALSE] } # Search for some available CEL files path <- system.file("rawData", package="AffymetrixDataTestFiles") file <- findFiles(pattern="[.](cel|CEL)$", path=path, recursive=TRUE) # Read CEL intensities in the upper left corner cel <- readCelRectangle(file, xrange=c(0,250), yrange=c(0,250)) z <- rotate270(cel$intensities) sub <- paste("Chip type:", cel$header$chiptype) image(z, col=gray.colors(256), axes=FALSE, main=basename(file), sub=sub) text(x=0, y=1, labels="(0,0)", adj=c(0,-0.7), cex=0.8, xpd=TRUE) text(x=1, y=0, labels="(250,250)", adj=c(1,1.2), cex=0.8, xpd=TRUE) # Clean up rm(rotate270, files, file, cel, z, sub) ############################################################## } # STOP # ############################################################## } \seealso{ The \code{\link{readCel}}() method is used internally. } \keyword{file} \keyword{IO} affxparser/man/readCelUnits.Rd0000644000175200017520000001131414516003651017362 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % readCelUnits.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{readCelUnits} \alias{readCelUnits} \title{Reads probe-level data ordered as units (probesets) from one or several Affymetrix CEL files} \usage{ readCelUnits(filenames, units=NULL, stratifyBy=c("nothing", "pmmm", "pm", "mm"), cdf=NULL, ..., addDimnames=FALSE, dropArrayDim=TRUE, transforms=NULL, readMap=NULL, verbose=FALSE) } \description{ Reads probe-level data ordered as units (probesets) from one or several Affymetrix CEL files by using the unit and group definitions in the corresponding Affymetrix CDF file. } \arguments{ \item{filenames}{The filenames of the CEL files.} \item{units}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of unit indices specifying which units to be read. If \code{\link[base]{NULL}}, all units are read.} \item{stratifyBy}{Argument passed to low-level method \code{\link[affxparser]{readCdfCellIndices}}.} \item{cdf}{A \code{\link[base]{character}} filename of a CDF file, or a CDF \code{\link[base]{list}} structure. If \code{\link[base]{NULL}}, the CDF file is searched for by \code{\link{findCdf}}() first starting from the current directory and then from the directory where the first CEL file is.} \item{...}{Arguments passed to low-level method \code{\link[affxparser]{readCel}}, e.g. \code{readXY} and \code{readStdvs}.} \item{addDimnames}{If \code{\link[base:logical]{TRUE}}, dimension names are added to arrays, otherwise not. The size of the returned CEL structure in bytes increases by 30-40\% with dimension names.} \item{dropArrayDim}{If \code{\link[base:logical]{TRUE}} and only one array is read, the elements of the group field do \emph{not} have an array dimension.} \item{transforms}{A \code{\link[base]{list}} of exactly \code{length(filenames)} \code{\link[base]{function}}s. If \code{\link[base]{NULL}}, no transformation is performed. Intensities read are passed through the corresponding transform function before being returned.} \item{readMap}{A \code{\link[base]{vector}} remapping cell indices to file indices. If \code{\link[base]{NULL}}, no mapping is used.} \item{verbose}{Either a \code{\link[base]{logical}}, a \code{\link[base]{numeric}}, or a \code{\link[R.utils]{Verbose}} object specifying how much verbose/debug information is written to standard output. If a Verbose object, how detailed the information is is specified by the threshold level of the object. If a numeric, the value is used to set the threshold of a new Verbose object. If \code{\link[base:logical]{TRUE}}, the threshold is set to -1 (minimal). If \code{\link[base:logical]{FALSE}}, no output is written (and neither is the \pkg{R.utils} package required). } } \value{ A named \code{\link[base]{list}} with one element for each unit read. The names corresponds to the names of the units read. Each unit element is in turn a \code{\link[base]{list}} structure with groups (aka blocks). Each group contains requested fields, e.g. \code{intensities}, \code{stdvs}, and \code{pixels}. If more than one CEL file is read, an extra dimension is added to each of the fields corresponding, which can be used to subset by CEL file. Note that neither CEL headers nor information about outliers and masked cells are returned. To access these, use \code{\link{readCelHeader}}() and \code{\link{readCel}}(). } \author{Henrik Bengtsson} \examples{ ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## # Search for some available CEL files path <- system.file("rawData", package="AffymetrixDataTestFiles") files <- findFiles(pattern="[.](cel|CEL)$", path=path, recursive=TRUE, firstOnly=FALSE) files <- grep("FusionSDK_Test3", files, value=TRUE) files <- grep("Calvin", files, value=TRUE) # Fake more CEL files if not enough files <- rep(files, length.out=5) print(files); rm(files); ############################################################## } # STOP # ############################################################## } \seealso{ Internally, \code{\link{readCelHeader}}(), \code{\link{readCdfUnits}}() and \code{\link{readCel}}() are used. } \references{ [1] Affymetrix Inc, Affymetrix GCOS 1.x compatible file formats, June 14, 2005. \url{http://www.affymetrix.com/support/developer/} } \keyword{file} \keyword{IO} affxparser/man/readChp.Rd0000644000175200017520000000252614516003651016353 0ustar00biocbuildbiocbuild\name{readChp} \alias{readChp} \title{ A function to read Affymetrix CHP files } \description{ This function will parse any type of CHP file and return the results in a list. The contents of the list will depend on the type of CHP file that is parsed and readers are referred to Affymetrix documentation of what should be there, and how to interpret it. } \usage{ readChp(filename, withQuant = TRUE) } \arguments{ \item{filename}{ The name of the CHP file to read. } \item{withQuant}{ A boolean value, currently largely unused. } } \details{ This is an interface to the Affymetrix Fusion SDK. The Affymetrix documentation should be consulted for explicit details. } \value{ A list is returned. The contents of the list depend on the type of CHP file that was read. Users may want to translate the different outputs into specific containers. } \section{Troubleshooting}{ It is considered a bug if the file contains information not accessible by this function, please report it. } \author{R. Gentleman} \seealso{ \code{\link{readCel}}} \examples{ if (require("AffymetrixDataTestFiles")) { path <- system.file("rawData", package="AffymetrixDataTestFiles") files <- findFiles(pattern="[.](chp|CHP)$", path=path, recursive=TRUE, firstOnly=FALSE) s1 = readChp(files[1]) length(s1) names(s1) names(s1[[7]]) } } \keyword{file} \keyword{IO} affxparser/man/readClf.Rd0000644000175200017520000000230514516003651016340 0ustar00biocbuildbiocbuild\name{readClf} \alias{readClf} \title{Parsing a CLF file using Affymetrix Fusion SDK} \description{ This function parses a CLF file using the Affymetrix Fusion SDK. CLF (chip layout) files contain information associating probe ids with chip x- and y- coordinates. } \usage{ readClf(file) } \arguments{ \item{file}{\code{character(1)} providing a path to the CLF file to be input.} } \value{ An list. The \code{header} element is always present. \item{header}{A list with information about the CLF file. The list contains elements described in the CLF file format document referenced below.} \item{dims}{A length-two integer vector of chip x- and y-coordinates.} \item{id}{An integer vector of length \code{prod(dims)} containing probe identifiers.} \item{x}{An integer vector of length \code{prod(dims)} containing x-coordinates corresponding to the entries in \code{id}.} \item{y}{An integer vector of length \code{prod(dims)} containing y-coordinates corresponding to the entries in \code{id}.} } \author{Martin Morgan} \seealso{ \url{https://www.affymetrix.com/support/developer/fusion/File_Format_CLF_aptv161.pdf} describes CLF file content. } \keyword{file} \keyword{IO} affxparser/man/readClfEnv.Rd0000644000175200017520000000274414516003651017020 0ustar00biocbuildbiocbuild\name{readClfEnv} \alias{readClfEnv} \title{Parsing a CLF file using Affymetrix Fusion SDK} \description{ This function parses a CLF file using the Affymetrix Fusion SDK. CLF (chip layout) files contain information associating probe ids with chip x- and y- coordinates. } \usage{ readClfEnv(file, readBody = TRUE) } \arguments{ \item{file}{\code{character(1)} providing a path to the CLF file to be input.} \item{readBody}{\code{logical(1)} indicating whether the entire file should be parsed (\code{TRUE}) or only the file header information describing the chips to which the file is relevant.} } \value{ An environment. The \code{header} element is always present; the remainder are present when \code{readBody=TRUE}. \item{header}{A list with information about the CLF file. The list contains elements described in the CLF file format document referenced below.} \item{dims}{A length-two integer vector of chip x- and y-coordinates.} \item{id}{An integer vector of length \code{prod(dims)} containing probe identifiers.} \item{x}{An integer vector of length \code{prod(dims)} containing x-coordinates corresponding to the entries in \code{id}.} \item{y}{An integer vector of length \code{prod(dims)} containing y-coordinates corresponding to the entries in \code{id}.} } \author{Martin Morgan} \seealso{ \url{https://www.affymetrix.com/support/developer/fusion/File_Format_CLF_aptv161.pdf} describes CLF file content. } \keyword{file} \keyword{IO} affxparser/man/readClfHeader.Rd0000644000175200017520000000103714516003651017452 0ustar00biocbuildbiocbuild\name{readClfHeader} \alias{readClfHeader} %- Also NEED an '\alias' for EACH other topic documented here. \title{Read the header of a CLF file.} \description{ Reads the header of a CLF file. The exact information stored in this file can be viewed in the \code{\link{readClfEnv}()} documentation which reads the header in addition to the body. } \usage{ readClfHeader(file) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{file}{\code{file} a CLF file} } \value{ A list of header elements. } \keyword{IO} affxparser/man/readPgf.Rd0000644000175200017520000000575614516003651016365 0ustar00biocbuildbiocbuild\name{readPgf} \alias{readPgf} \title{Parsing a PGF file using Affymetrix Fusion SDK} \description{ This function parses a PGF file using the Affymetrix Fusion SDK. PGF (probe group) files describe probes present within probe sets, including the type (e.g., pm, mm) of the probe and probeset. } \usage{ readPgf(file, indices = NULL) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{file}{\code{character(1)} providing a path to the PGF file to be input.} \item{indices}{\code{integer(n)} a vector of indices of the probesets to be read.} } \value{ An list. The \code{header} element is always present; the remainder are present when \code{readBody=TRUE}. The elements present when \code{readBody=TRUE} describe probe sets, atoms, and probes. Elements within probe sets, for instance, are coordinated such that the \code{i}th index of one vector (e.g., \code{probesetId}) corresponds to the \code{i}th index of a second vector (e.g., \code{probesetType}). The atoms contained within probeset \code{i} are in positions \code{probesetStartAtom[i]:(probesetStartAtom[i+1]-1)} of the atom vectors. A similar map applies to probes within atoms, using \code{atomStartProbe} as the index. The PGF file format includes optional elements; these elements are always present in the list, but with appropriate default values. \item{header}{A list with information about the PGF file. The list contains elements described in the PGF file format document referenced below.} \item{probesetId}{integer vector of probeset identifiers.} \item{probesetType}{character vector of probeset types. Types are described in the PGF file format document.} \item{probesetName}{character vector of probeset names.} \item{probesetStartAtom}{integer vector of the start index (e.g., in the element \code{atomId} of atoms belonging to this probeset).} \item{atomId}{integer vector of atom identifiers.} \item{atomExonPosition}{integer vector of probe interrogation position relative to the target sequence.} \item{atomStartProbe}{integer vector of the start index (e.g., in the element \code{probeId} of probes belonging to this atom).} \item{probeId}{integer vector of probe identifiers.} \item{probeType}{character vector of probe types. Types are described in the PGF file format document.} \item{probeGcCount}{integer vector of probe GC content.} \item{probeLength}{integer vector of probe lengths.} \item{probeInterrogationPosition}{integer vector of the position, within the probe, at which interrogation occurs.} \item{probeSequence}{character vector of the probe sequence.} } \author{Martin Morgan} \seealso{ \url{https://www.affymetrix.com/support/developer/fusion/File_Format_PGF_aptv161.pdf} describes PGF file content. The internal function \code{.pgfProbeIndexFromProbesetIndex} provides a map between the indices of probe set entries and the indices of the probes contained in the probe set. } \keyword{file} \keyword{IO} affxparser/man/readPgfEnv.Rd0000644000175200017520000000646114516003651017030 0ustar00biocbuildbiocbuild\name{readPgfEnv} \alias{readPgfEnv} \title{Parsing a PGF file using Affymetrix Fusion SDK} \description{ This function parses a PGF file using the Affymetrix Fusion SDK. PGF (probe group) files describe probes present within probe sets, including the type (e.g., pm, mm) of the probe and probeset. } \usage{ readPgfEnv(file, readBody = TRUE, indices = NULL) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{file}{\code{character(1)} providing a path to the PGF file to be input.} \item{readBody}{\code{logical(1)} indicating whether the entire file should be parsed (\code{TRUE}) or only the file header information describing the chips to which the file is relevant.} \item{indices}{\code{integer(n)} vector of positive integers indicating which probesets to read. These integers must be sorted (increasing) and unique.} } \value{ An environment. The \code{header} element is always present; the remainder are present when \code{readBody=TRUE}. The elements present when \code{readBody=TRUE} describe probe sets, atoms, and probes. Elements within probe sets, for instance, are coordinated such that the \code{i}th index of one vector (e.g., \code{probesetId}) corresponds to the \code{i}th index of a second vector (e.g., \code{probesetType}). The atoms contained within probeset \code{i} are in positions \code{probesetStartAtom[i]:(probesetStartAtom[i+1]-1)} of the atom vectors. A similar map applies to probes within atoms, using \code{atomStartProbe} as the index. The PGF file format includes optional elements; these elements are always present in the environment, but with appropriate default values. \item{header}{A list with information about the PGF file. The list contains elements described in the PGF file format document referenced below.} \item{probesetId}{integer vector of probeset identifiers.} \item{probesetType}{character vector of probeset types. Types are described in the PGF file format document.} \item{probesetName}{character vector of probeset names.} \item{probesetStartAtom}{integer vector of the start index (e.g., in the element \code{atomId} of atoms belonging to this probeset).} \item{atomId}{integer vector of atom identifiers.} \item{atomExonPosition}{integer vector of probe interrogation position relative to the target sequence.} \item{atomStartProbe}{integer vector of the start index (e.g., in the element \code{probeId} of probes belonging to this atom).} \item{probeId}{integer vector of probe identifiers.} \item{probeType}{character vector of probe types. Types are described in the PGF file format document.} \item{probeGcCount}{integer vector of probe GC content.} \item{probeLength}{integer vector of probe lengths.} \item{probeInterrogationPosition}{integer vector of the position, within the probe, at which interrogation occurs.} \item{probeSequence}{character vector of the probe sequence.} } \author{Martin Morgan} \seealso{ \url{https://www.affymetrix.com/support/developer/fusion/File_Format_PGF_aptv161.pdf} describes PGF file content. The internal function \code{.pgfProbeIndexFromProbesetIndex} provides a map between the indices of probe set entries and the indices of the probes contained in the probe set. } \keyword{file} \keyword{IO} affxparser/man/readPgfHeader.Rd0000644000175200017520000000103314516003651017456 0ustar00biocbuildbiocbuild\name{readPgfHeader} \alias{readPgfHeader} \title{ Read the header of a PGF file into a list. } \description{ This function reads the header of a PGF file into a list more details on what the exact fields are can be found in the details section. } \usage{ readPgfHeader(file) } \arguments{ \item{file}{\code{file}:A file in PGF format} } \details{ \url{https://www.affymetrix.com/support/developer/fusion/File_Format_PGF_aptv161.pdf} } \value{ A list corresponding to the elements in the header. } \keyword{file} \keyword{IO} affxparser/man/updateCel.Rd0000644000175200017520000001215214516003651016707 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % updateCel.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{updateCel} \alias{updateCel} \title{Updates a CEL file} \usage{ updateCel(filename, indices=NULL, intensities=NULL, stdvs=NULL, pixels=NULL, writeMap=NULL, ..., verbose=0) } \description{ Updates a CEL file. } \arguments{ \item{filename}{The filename of the CEL file.} \item{indices}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of cell (probe) indices specifying which cells to updated. If \code{\link[base]{NULL}}, all indices are considered.} \item{intensities}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of intensity values to be stored. Alternatively, it can also be a named \code{\link[base]{data.frame}} or \code{\link[base]{matrix}} (or \code{\link[base]{list}}) where the named columns (elements) are the fields to be updated.} \item{stdvs}{A optional \code{\link[base]{numeric}} \code{\link[base]{vector}}.} \item{pixels}{A optional \code{\link[base]{numeric}} \code{\link[base]{vector}}.} \item{writeMap}{An optional write map.} \item{...}{Not used.} \item{verbose}{An \code{\link[base]{integer}} specifying how much verbose details are outputted.} } \value{ Returns (invisibly) the pathname of the file updated. } \details{ Currently only binary (v4) CEL files are supported. The current version of the method does not make use of the Fusion SDK, but its own code to navigate and update the CEL file. } \examples{ ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## # Search for some available Calvin CEL files path <- system.file("rawData", package="AffymetrixDataTestFiles") files <- findFiles(pattern="[.](cel|CEL)$", path=path, recursive=TRUE, firstOnly=FALSE) files <- grep("FusionSDK_HG-U133A", files, value=TRUE) files <- grep("Calvin", files, value=TRUE) file <- files[1] # Convert to an XDA CEL file filename <- file.path(tempdir(), basename(file)) if (file.exists(filename)) file.remove(filename) convertCel(file, filename) fields <- c("intensities", "stdvs", "pixels") # Cells to be updated idxs <- 1:2 # Get CEL header hdr <- readCelHeader(filename) # Get the original data cel <- readCel(filename, indices=idxs, readStdvs=TRUE, readPixels=TRUE) print(cel[fields]) cel0 <- cel # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Square-root the intensities # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - updateCel(filename, indices=idxs, intensities=sqrt(cel$intensities)) cel <- readCel(filename, indices=idxs, readStdvs=TRUE, readPixels=TRUE) print(cel[fields]) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update a few cell values by a data frame # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- data.frame( intensities=cel0$intensities, stdvs=c(201.1, 3086.1)+0.5, pixels=c(9,9+1) ) updateCel(filename, indices=idxs, data) # Assert correctness of update cel <- readCel(filename, indices=idxs, readStdvs=TRUE, readPixels=TRUE) print(cel[fields]) for (ff in fields) { stopifnot(all.equal(cel[[ff]], data[[ff]], .Machine$double.eps^0.25)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update a region of the CEL file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Load pre-defined data side <- 306 pathname <- system.file("extras/easternEgg.gz", package="affxparser") con <- gzfile(pathname, open="rb") z <- readBin(con=con, what="integer", size=1, signed=FALSE, n=side^2) close(con) z <- matrix(z, nrow=side) side <- min(hdr$cols - 2*22, side) z <- as.double(z[1:side,1:side]) x <- matrix(22+0:(side-1), nrow=side, ncol=side, byrow=TRUE) idxs <- as.vector((1 + x) + hdr$cols*t(x)) # Load current data in the same region z0 <- readCel(filename, indices=idxs)$intensities # Mix the two data sets z <- (0.3*z^2 + 0.7*z0) # Update the CEL file updateCel(filename, indices=idxs, intensities=z) # Make some spatial changes rotate270 <- function(x, ...) { x <- t(x) nc <- ncol(x) if (nc < 2) return(x) x[,nc:1,drop=FALSE] } # Display a spatial image of the updated CEL file cel <- readCelRectangle(filename, xrange=c(0,350), yrange=c(0,350)) z <- rotate270(cel$intensities) sub <- paste("Chip type:", cel$header$chiptype) image(z, col=gray.colors(256), axes=FALSE, main=basename(filename), sub=sub) text(x=0, y=1, labels="(0,0)", adj=c(0,-0.7), cex=0.8, xpd=TRUE) text(x=1, y=0, labels="(350,350)", adj=c(1,1.2), cex=0.8, xpd=TRUE) # Clean up file.remove(filename) rm(files, cel, cel0, idxs, data, ff, fields, rotate270) ############################################################## } # STOP # ############################################################## } \author{Henrik Bengtsson} \keyword{file} \keyword{IO} affxparser/man/updateCelUnits.Rd0000644000175200017520000001051714516003651017735 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % updateCelUnits.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{updateCelUnits} \alias{updateCelUnits} \title{Updates a CEL file unit by unit} \usage{ updateCelUnits(filename, cdf=NULL, data, ..., verbose=0) } \description{ Updates a CEL file unit by unit.\cr \emph{Please note that, contrary to \code{\link{readCelUnits}}(), this method can only update a single CEL file at the time.} } \arguments{ \item{filename}{The filename of the CEL file.} \item{cdf}{A (optional) CDF \code{\link[base]{list}} structure either with field \code{indices} or fields \code{x} and \code{y}. If \code{\link[base]{NULL}}, the unit names (and from there the cell indices) are inferred from the names of the elements in \code{data}. } \item{data}{A \code{\link[base]{list}} structure in a format similar to what is returned by \code{\link{readCelUnits}}() for \emph{a single CEL file only}.} \item{...}{Optional arguments passed to \code{\link{readCdfCellIndices}}(), which is called if \code{cdf} is not given.} \item{verbose}{An \code{\link[base]{integer}} specifying how much verbose details are outputted.} } \value{ Returns what \code{\link{updateCel}}() returns. } \section{Working with re-arranged CDF structures}{ Note that if the \code{cdf} structure is specified the CDF file is \emph{not} queried, but all information about cell x and y locations, that is, cell indices is expected to be in this structure. This can be very useful when one work with a cdf structure that originates from the underlying CDF file, but has been restructured for instance through the \code{\link{applyCdfGroups}}() method, and \code{data} correspondingly. This update method knows how to update such structures too. } \examples{ ############################################################## if (require("AffymetrixDataTestFiles")) { # START # ############################################################## # Search for some available Calvin CEL files path <- system.file("rawData", package="AffymetrixDataTestFiles") files <- findFiles(pattern="[.](cel|CEL)$", path=path, recursive=TRUE, firstOnly=FALSE) files <- grep("FusionSDK_Test3", files, value=TRUE) files <- grep("Calvin", files, value=TRUE) file <- files[1] # Convert to an XDA CEL file pathname <- file.path(tempdir(), basename(file)) if (file.exists(pathname)) file.remove(pathname) convertCel(file, pathname) # Check for the CDF file hdr <- readCelHeader(pathname) cdfFile <- findCdf(hdr$chiptype) hdr <- readCdfHeader(cdfFile) nbrOfUnits <- hdr$nunits print(nbrOfUnits); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Example: Read and re-write the same data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - units <- c(101, 51) data1 <- readCelUnits(pathname, units=units, readStdvs=TRUE) cat("Original data:\n") str(data1) updateCelUnits(pathname, data=data1) data2 <- readCelUnits(pathname, units=units, readStdvs=TRUE) cat("Updated data:\n") str(data2) stopifnot(identical(data1, data2)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Example: Random read and re-write "stress test" # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (kk in 1:10) { nunits <- sample(min(1000,nbrOfUnits), size=1) units <- sample(nbrOfUnits, size=nunits) cat(sprintf("\%02d. Selected \%d random units: reading", kk, nunits)); t <- system.time({ data1 <- readCelUnits(pathname, units=units, readStdvs=TRUE) }, gcFirst=TRUE)[3] cat(sprintf(" [\%.2fs=\%.2fs/unit], updating", t, t/nunits)) t <- system.time({ updateCelUnits(pathname, data=data1) }, gcFirst=TRUE)[3] cat(sprintf(" [\%.2fs=\%.2fs/unit], validating", t, t/nunits)) data2 <- readCelUnits(pathname, units=units, readStdvs=TRUE) stopifnot(identical(data1, data2)) cat(". done\n") } ############################################################## } # STOP # ############################################################## } \author{Henrik Bengtsson} \seealso{ Internally, \code{\link{updateCel}}() is used. } \keyword{file} \keyword{IO} affxparser/man/writeCdf.Rd0000644000175200017520000000266314516003651016556 0ustar00biocbuildbiocbuild\name{writeCdf} \alias{writeCdf} \title{Creates a binary CDF file} \description{ This function creates a binary CDF file given a valid CDF structure containing all necessary elements. \emph{Warning: The API for this function is likely to be changed in future versions.} } \usage{ writeCdf(fname, cdfheader, cdf, cdfqc, overwrite=FALSE, verbose=0) } \arguments{ \item{fname}{name of the CDF file.} \item{cdfheader}{A list with a structure equal to the output of \code{readCdfHeader}.} \item{cdf}{A list with a structure equal to the output of \code{readCdf}.} \item{cdfqc}{A list with a structure equal to the output of \code{readCdfQc}.} \item{overwrite}{Overwrite existing file?} \item{verbose}{how verbose should the output be. 0 means no output, with higher numbers being more verbose.} } \details{ This function has been validated mainly by reading in various ASCII or binary CDF files which are written back as new CDF files, and compared element by element with the original files. } \value{ This function is used for its byproduct: creating a CDF file. } \author{ Kasper Daniel Hansen } \seealso{ To read the CDF "regular" and QC units with all necessary fields and values for writing a CDF file, see \code{\link{readCdf}}, \code{\link{readCdfQc}}() and \code{\link{readCdfHeader}}. To compare two CDF files, see \code{\link{compareCdfs}}. } \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/writeCdfHeader.Rd0000644000175200017520000000303514516003651017661 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % writeCdfHeader.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{writeCdfHeader} \alias{writeCdfHeader} \title{Writes a CDF header} \usage{ writeCdfHeader(con, cdfHeader, unitNames, qcUnitLengths, unitLengths, verbose=0) } \description{ Writes a CDF header. \emph{This method is not intended to be used explicitly. To write a CDF, use \code{\link{writeCdf}}() instead.} } \arguments{ \item{con}{An open \code{\link[base:connections]{connection}} to which nothing has been written.} \item{cdfHeader}{A CDF header \code{\link[base]{list}} structure.} \item{unitNames}{A \code{\link[base]{character}} \code{\link[base]{vector}} of all unit names.} \item{qcUnitLengths}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of all the number of bytes in each of the QC units.} \item{unitLengths}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of all the number of bytes in each of the (ordinary) units.} \item{verbose}{An \code{\link[base]{integer}} specifying how much verbose details are outputted.} } \value{ Returns nothing. } \author{Henrik Bengtsson} \seealso{ This method is called by \code{\link{writeCdf}}(). See also \code{\link{writeCdfQcUnits}}() and \code{\link{writeCdfUnits}}(). } \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/writeCdfQcUnits.Rd0000644000175200017520000000241114516003651020054 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % writeCdfQcUnits.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{writeCdfQcUnits} \alias{writeCdfQcUnits} \title{Writes CDF QC units} \usage{ writeCdfQcUnits(con, cdfQcUnits, verbose=0) } \description{ Writes CDF QC units. \emph{This method is not intended to be used explicitly. To write a CDF, use \code{\link{writeCdf}}() instead.} } \arguments{ \item{con}{An open \code{\link[base:connections]{connection}} to which a CDF header already has been written by \code{\link{writeCdfHeader}}().} \item{cdfQcUnits}{A \code{\link[base]{list}} structure of CDF QC units as returned by \code{\link{readCdf}}() (\emph{not} \code{\link{readCdfUnits}}()).} \item{verbose}{An \code{\link[base]{integer}} specifying how much verbose details are outputted.} } \value{ Returns nothing. } \author{Henrik Bengtsson} \seealso{ This method is called by \code{\link{writeCdf}}(). See also \code{\link{writeCdfHeader}}() and \code{\link{writeCdfUnits}}(). } \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/writeCdfUnits.Rd0000644000175200017520000000247314516003651017600 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % writeCdfUnits.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{writeCdfUnits} \alias{writeCdfUnits} \title{Writes CDF units} \usage{ writeCdfUnits(con, cdfUnits, verbose=0) } \description{ Writes CDF units. \emph{This method is not intended to be used explicitly. To write a CDF, use \code{\link{writeCdf}}() instead.} } \arguments{ \item{con}{An open \code{\link[base:connections]{connection}} to which a CDF header and QC units already have been written by \code{\link{writeCdfHeader}}() and \code{\link{writeCdfQcUnits}}(), respectively.} \item{cdfUnits}{A \code{\link[base]{list}} structure of CDF units as returned by \code{\link{readCdf}}() (\emph{not} \code{\link{readCdfUnits}}()).} \item{verbose}{An \code{\link[base]{integer}} specifying how much verbose details are outputted.} } \value{ Returns nothing. } \author{Henrik Bengtsson} \seealso{ This method is called by \code{\link{writeCdf}}(). See also \code{\link{writeCdfHeader}}() and \code{\link{writeCdfQcUnits}}(). } \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/writeCelHeader.Rd0000644000175200017520000000334414516003651017673 0ustar00biocbuildbiocbuild%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % writeCelHeader.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{writeCelHeader} \alias{writeCelHeader} \title{Writes a CEL header to a connection} \usage{ writeCelHeader(con, header, outputVersion=c("4"), ...) } \description{ Writes a CEL header to a connection. } \arguments{ \item{con}{A \code{\link[base:connections]{connection}}.} \item{header}{A \code{\link[base]{list}} structure describing the CEL header, similar to the structure returned by \code{\link{readCelHeader}}().} \item{outputFormat}{A \code{\link[base]{character}} string specifying the output format. Currently only CEL version 4 (binary;XDA) are supported.} \item{...}{Not used.} } \value{ Returns (invisibly) the pathname of the file created. } \details{ Currently only CEL version 4 (binary;XDA) headers can be written. } \section{Redundant fields}{ The CEL v4 header contains redundant information. To avoid inconsistency this method generates such redundant values from the original values. This is consistent to how the CEL reader in Fusion SDK does it, cf. \code{\link{readCelHeader}}(). The redundant information is in the (CEL v3) \code{header} field, which contains the CEL header information as it would appear in the CEL v3 format. This in turn contains a DAT header field reproducing the DAT header from the image analysis. It is from this DAT header that the chip type is extracted. } \author{Henrik Bengtsson} \keyword{file} \keyword{IO} \keyword{internal} affxparser/man/writeTpmap.Rd0000644000175200017520000000206014516003651017132 0ustar00biocbuildbiocbuild\name{writeTpmap} \alias{writeTpmap} \alias{tpmap2bpmap} %- Also NEED an '\alias' for EACH other topic documented here. \title{Writes BPMAP and TPMAP files.} \description{ Writes BPMAP and TPMAP files. } \usage{ writeTpmap(filename, bpmaplist, verbose = 0) tpmap2bpmap(tpmapname, bpmapname, verbose = 0) } \arguments{ \item{filename}{The filename.} \item{bpmaplist}{A list structure similar to the result of \code{readBpmap}.} \item{tpmapname}{Filename of the TPMAP file.} \item{bpmapname}{Filename of the BPMAP file.} \item{verbose}{How verbose do we want to be.} } \details{ \code{writeTpmap} writes a text probe map file, while \code{tpmap2bpmap} converts such a file to a binary probe mapping file. Somehow Affymetrix has different names for the same structure, depending on whether the file is binary or text. I have seen many TPMAP files referred to as BPMAP files. } \value{ These functions are called for their side effects (creating files). } \author{Kasper Daniel Hansen} \seealso{\code{\link{readBpmap}}} \keyword{file} \keyword{IO} affxparser/src/0000755000175200017520000000000014516022540014523 5ustar00biocbuildbiocbuildaffxparser/src/000.init.c0000644000175200017520000000630014516003651016131 0ustar00biocbuildbiocbuild#include #include // for NULL #include /* FIXME: Check these declarations against the C/Fortran source code. */ /* .Call calls */ extern SEXP R_affx_cdf_groupNames(SEXP, SEXP, SEXP, SEXP); extern SEXP R_affx_cdf_isPm(SEXP, SEXP, SEXP); extern SEXP R_affx_cdf_nbrOfCellsPerUnitGroup(SEXP, SEXP, SEXP); extern SEXP R_affx_get_bpmap_file(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP R_affx_get_bpmap_header(SEXP); extern SEXP R_affx_get_bpmap_seqinfo(SEXP, SEXP, SEXP); extern SEXP R_affx_get_cdf_cell_indices(SEXP, SEXP, SEXP); extern SEXP R_affx_get_cdf_file(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP R_affx_get_cdf_file_header(SEXP); extern SEXP R_affx_get_cdf_file_qc(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP R_affx_get_cdf_unit_names(SEXP, SEXP, SEXP); extern SEXP R_affx_get_cdf_units(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP R_affx_get_cel_file(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP R_affx_get_cel_file_header(SEXP); extern SEXP R_affx_get_chp_file(SEXP, SEXP); extern SEXP R_affx_get_clf_file(SEXP, SEXP, SEXP); extern SEXP R_affx_get_pgf_file(SEXP, SEXP, SEXP, SEXP); extern SEXP R_affx_write_bpmap_file(SEXP, SEXP, SEXP); static const R_CallMethodDef CallEntries[] = { {"R_affx_cdf_groupNames", (DL_FUNC) &R_affx_cdf_groupNames, 4}, {"R_affx_cdf_isPm", (DL_FUNC) &R_affx_cdf_isPm, 3}, {"R_affx_cdf_nbrOfCellsPerUnitGroup", (DL_FUNC) &R_affx_cdf_nbrOfCellsPerUnitGroup, 3}, {"R_affx_get_bpmap_file", (DL_FUNC) &R_affx_get_bpmap_file, 12}, {"R_affx_get_bpmap_header", (DL_FUNC) &R_affx_get_bpmap_header, 1}, {"R_affx_get_bpmap_seqinfo", (DL_FUNC) &R_affx_get_bpmap_seqinfo, 3}, {"R_affx_get_cdf_cell_indices", (DL_FUNC) &R_affx_get_cdf_cell_indices, 3}, {"R_affx_get_cdf_file", (DL_FUNC) &R_affx_get_cdf_file, 15}, {"R_affx_get_cdf_file_header", (DL_FUNC) &R_affx_get_cdf_file_header, 1}, {"R_affx_get_cdf_file_qc", (DL_FUNC) &R_affx_get_cdf_file_qc, 10}, {"R_affx_get_cdf_unit_names", (DL_FUNC) &R_affx_get_cdf_unit_names, 3}, {"R_affx_get_cdf_units", (DL_FUNC) &R_affx_get_cdf_units, 9}, {"R_affx_get_cel_file", (DL_FUNC) &R_affx_get_cel_file, 11}, {"R_affx_get_cel_file_header", (DL_FUNC) &R_affx_get_cel_file_header, 1}, {"R_affx_get_chp_file", (DL_FUNC) &R_affx_get_chp_file, 2}, {"R_affx_get_clf_file", (DL_FUNC) &R_affx_get_clf_file, 3}, {"R_affx_get_pgf_file", (DL_FUNC) &R_affx_get_pgf_file, 4}, {"R_affx_write_bpmap_file", (DL_FUNC) &R_affx_write_bpmap_file, 3}, {NULL, NULL, 0} }; void R_init_affxparser(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } affxparser/src/Makevars0000644000175200017520000001361514516003651016227 0ustar00biocbuildbiocbuild## -Wno-unused-private-field gives notes/errors with some compiler MYCXXFLAGS = -Wno-sign-compare -O0 %.o: %.cpp $(CXX) $(ALL_CPPFLAGS) $(ALL_CXXFLAGS) $(MYCXXFLAGS) -c $< -o $@ %.o: %.c $(CC) $(ALL_CPPFLAGS) $(ALL_CFLAGS) -c $< -o $@ FUSION_SDK = fusion PKG_CPPFLAGS = \ -I. \ -I$(FUSION_SDK)/calvin_files/array/src\ -I$(FUSION_SDK)/calvin_files/data/src\ -I$(FUSION_SDK)/calvin_files/exception/src\ -I$(FUSION_SDK)/calvin_files/fusion/src\ -I$(FUSION_SDK)/calvin_files/fusion/src/GCOSAdapter\ -I$(FUSION_SDK)/calvin_files/fusion/src/CalvinAdapter\ -I$(FUSION_SDK)/calvin_files/parameter/src\ -I$(FUSION_SDK)/calvin_files/parsers/src\ -I$(FUSION_SDK)/calvin_files/portability/src\ -I$(FUSION_SDK)/calvin_files/template/src\ -I$(FUSION_SDK)/calvin_files/utils/src\ -I$(FUSION_SDK)/calvin_files/writers/src\ -I$(FUSION_SDK)/file\ -I$(FUSION_SDK)/file/TsvFile\ -I$(FUSION_SDK)/portability\ -I$(FUSION_SDK)/util\ -I$(FUSION_SDK)\ -D_USE_MEM_MAPPING_ PKG_CPP_SOURCES = \ $(FUSION_SDK)/calvin_files/data/src/CDFData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CDFProbeGroupInformation.cpp\ $(FUSION_SDK)/calvin_files/data/src/CDFProbeInformation.cpp\ $(FUSION_SDK)/calvin_files/data/src/CDFProbeSetInformation.cpp\ $(FUSION_SDK)/calvin_files/data/src/CDFQCProbeInformation.cpp\ $(FUSION_SDK)/calvin_files/data/src/CDFQCProbeSetInformation.cpp\ $(FUSION_SDK)/calvin_files/data/src/CELData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPBackgroundZone.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPExpressionEntry.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPMultiDataData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPTilingData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPQuantificationData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPQuantificationDetectionData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPGenotypeEntry.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPUniversalEntry.cpp\ $(FUSION_SDK)/calvin_files/data/src/ColumnInfo.cpp\ $(FUSION_SDK)/calvin_files/data/src/DataGroup.cpp\ $(FUSION_SDK)/calvin_files/data/src/DataGroupHeader.cpp\ $(FUSION_SDK)/calvin_files/data/src/DataSet.cpp\ $(FUSION_SDK)/calvin_files/data/src/DataSetHeader.cpp\ $(FUSION_SDK)/calvin_files/data/src/FileHeader.cpp\ $(FUSION_SDK)/calvin_files/data/src/GenericData.cpp\ $(FUSION_SDK)/calvin_files/data/src/GenericDataHeader.cpp\ $(FUSION_SDK)/calvin_files/exception/src/ExceptionBase.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/CalvinAdapter/CalvinCELDataAdapter.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/CalvinAdapter/CalvinCHPDataAdapter.cpp \ $(FUSION_SDK)/calvin_files/fusion/src/FusionBPMAPData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCDFData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCDFQCProbeSetNames.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCELData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPData.cpp \ $(FUSION_SDK)/calvin_files/fusion/src/FusionProbeSetResults.cpp \ $(FUSION_SDK)/calvin_files/fusion/src/GCOSAdapter/GCOSCELDataAdapter.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/GCOSAdapter/GCOSCHPDataAdapter.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPLegacyData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPMultiDataAccessor.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPMultiDataData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPTilingData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPGenericData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPQuantificationData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPQuantificationDetectionData.cpp\ $(FUSION_SDK)/calvin_files/parameter/src/ParameterNameValueType.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/CDFFileReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/CelFileReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/CHPFileReader.cpp \ $(FUSION_SDK)/calvin_files/parsers/src/CHPMultiDataFileReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/CHPTilingFileReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/CHPQuantificationFileReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/CHPQuantificationDetectionFileReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/DataGroupHeaderReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/DataGroupReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/DataSetHeaderReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/DataSetReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/FileHeaderReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/FileInput.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/GenericDataHeaderReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/GenericFileReader.cpp\ $(FUSION_SDK)/calvin_files/utils/src/AffymetrixGuid.cpp\ $(FUSION_SDK)/calvin_files/utils/src/DateTime.cpp\ $(FUSION_SDK)/calvin_files/utils/src/FileUtils.cpp\ $(FUSION_SDK)/calvin_files/utils/src/StringUtils.cpp\ $(FUSION_SDK)/calvin_files/utils/src/checksum.cpp\ $(FUSION_SDK)/file/BPMAPFileData.cpp\ $(FUSION_SDK)/file/BPMAPFileWriter.cpp\ $(FUSION_SDK)/file/CDFFileData.cpp\ $(FUSION_SDK)/file/CELFileData.cpp\ $(FUSION_SDK)/file/CHPFileData.cpp\ $(FUSION_SDK)/file/FileIO.cpp\ $(FUSION_SDK)/file/FileWriter.cpp\ $(FUSION_SDK)/file/TsvFile/ClfFile.cpp\ $(FUSION_SDK)/file/TsvFile/PgfFile.cpp\ $(FUSION_SDK)/file/TsvFile/TsvFile.cpp\ $(FUSION_SDK)/util/AffxByteArray.cpp\ $(FUSION_SDK)/util/AffxConv.cpp\ $(FUSION_SDK)/util/MsgStream.cpp\ $(FUSION_SDK)/util/Util.cpp\ $(FUSION_SDK)/util/Err.cpp\ $(FUSION_SDK)/util/Fs.cpp\ $(FUSION_SDK)/util/Verbose.cpp\ $(FUSION_SDK)/util/RowFile.cpp\ $(FUSION_SDK)/util/TableFile.cpp\ $(FUSION_SDK)/util/Convert.cpp\ R_affx_cel_parser.cpp\ R_affx_cdf_parser.cpp\ R_affx_cdf_extras.cpp\ R_affx_bpmap_parser.cpp\ R_affx_clf_pgf_parser.cpp\ R_affx_chp_parser.cpp PKG_C_SOURCES = \ 000.init.c PKG_SOURCES=$(PKG_CPP_SOURCES) $(PKG_C_SOURCES) CPP_OBJECTS=$(PKG_CPP_SOURCES:.cpp=.o) C_OBJECTS=$(PKG_C_SOURCES:.c=.o) OBJECTS=$(CPP_OBJECTS) $(C_OBJECTS) all: myclean $(SHLIB) myclean: rm -f $(OBJECTS) affxparser/src/Makevars.win0000644000175200017520000001370014516003651017016 0ustar00biocbuildbiocbuildPKG_LIBS = -lws2_32 ## -Wno-unused-private-field gives notes/errors with some compiler MYCXXFLAGS = -Wno-sign-compare -Wno-unknown-pragmas %.o: %.cpp $(CXX) $(ALL_CPPFLAGS) $(ALL_CXXFLAGS) $(MYCXXFLAGS) -c $< -o $@ %.o: %.c $(CC) $(ALL_CPPFLAGS) $(ALL_CFLAGS) -c $< -o $@ FUSION_SDK = fusion PKG_CPPFLAGS = \ -I. \ -I$(FUSION_SDK)/calvin_files/array/src\ -I$(FUSION_SDK)/calvin_files/data/src\ -I$(FUSION_SDK)/calvin_files/exception/src\ -I$(FUSION_SDK)/calvin_files/fusion/src\ -I$(FUSION_SDK)/calvin_files/fusion/src/GCOSAdapter\ -I$(FUSION_SDK)/calvin_files/fusion/src/CalvinAdapter\ -I$(FUSION_SDK)/calvin_files/parameter/src\ -I$(FUSION_SDK)/calvin_files/parsers/src\ -I$(FUSION_SDK)/calvin_files/portability/src\ -I$(FUSION_SDK)/calvin_files/template/src\ -I$(FUSION_SDK)/calvin_files/utils/src\ -I$(FUSION_SDK)/calvin_files/writers/src\ -I$(FUSION_SDK)/file\ -I$(FUSION_SDK)/file/TsvFile\ -I$(FUSION_SDK)/portability\ -I$(FUSION_SDK)/util\ -I$(FUSION_SDK)\ -D_USE_MEM_MAPPING_\ -D_MSC_VER PKG_CPP_SOURCES = \ $(FUSION_SDK)/calvin_files/data/src/CDFData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CDFProbeGroupInformation.cpp\ $(FUSION_SDK)/calvin_files/data/src/CDFProbeInformation.cpp\ $(FUSION_SDK)/calvin_files/data/src/CDFProbeSetInformation.cpp\ $(FUSION_SDK)/calvin_files/data/src/CDFQCProbeInformation.cpp\ $(FUSION_SDK)/calvin_files/data/src/CDFQCProbeSetInformation.cpp\ $(FUSION_SDK)/calvin_files/data/src/CELData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPBackgroundZone.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPExpressionEntry.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPMultiDataData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPTilingData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPQuantificationData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPQuantificationDetectionData.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPGenotypeEntry.cpp\ $(FUSION_SDK)/calvin_files/data/src/CHPUniversalEntry.cpp\ $(FUSION_SDK)/calvin_files/data/src/ColumnInfo.cpp\ $(FUSION_SDK)/calvin_files/data/src/DataGroup.cpp\ $(FUSION_SDK)/calvin_files/data/src/DataGroupHeader.cpp\ $(FUSION_SDK)/calvin_files/data/src/DataSet.cpp\ $(FUSION_SDK)/calvin_files/data/src/DataSetHeader.cpp\ $(FUSION_SDK)/calvin_files/data/src/FileHeader.cpp\ $(FUSION_SDK)/calvin_files/data/src/GenericData.cpp\ $(FUSION_SDK)/calvin_files/data/src/GenericDataHeader.cpp\ $(FUSION_SDK)/calvin_files/exception/src/ExceptionBase.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/CalvinAdapter/CalvinCELDataAdapter.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/CalvinAdapter/CalvinCHPDataAdapter.cpp \ $(FUSION_SDK)/calvin_files/fusion/src/FusionBPMAPData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCDFData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCDFQCProbeSetNames.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCELData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPData.cpp \ $(FUSION_SDK)/calvin_files/fusion/src/FusionProbeSetResults.cpp \ $(FUSION_SDK)/calvin_files/fusion/src/GCOSAdapter/GCOSCELDataAdapter.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/GCOSAdapter/GCOSCHPDataAdapter.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPLegacyData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPMultiDataAccessor.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPMultiDataData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPTilingData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPGenericData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPQuantificationData.cpp\ $(FUSION_SDK)/calvin_files/fusion/src/FusionCHPQuantificationDetectionData.cpp\ $(FUSION_SDK)/calvin_files/parameter/src/ParameterNameValueType.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/CDFFileReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/CelFileReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/CHPFileReader.cpp \ $(FUSION_SDK)/calvin_files/parsers/src/CHPMultiDataFileReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/CHPTilingFileReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/CHPQuantificationFileReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/CHPQuantificationDetectionFileReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/DataGroupHeaderReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/DataGroupReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/DataSetHeaderReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/DataSetReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/FileHeaderReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/FileInput.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/GenericDataHeaderReader.cpp\ $(FUSION_SDK)/calvin_files/parsers/src/GenericFileReader.cpp\ $(FUSION_SDK)/calvin_files/utils/src/AffymetrixGuid.cpp\ $(FUSION_SDK)/calvin_files/utils/src/DateTime.cpp\ $(FUSION_SDK)/calvin_files/utils/src/FileUtils.cpp\ $(FUSION_SDK)/calvin_files/utils/src/StringUtils.cpp\ $(FUSION_SDK)/calvin_files/utils/src/checksum.cpp\ $(FUSION_SDK)/file/BPMAPFileData.cpp\ $(FUSION_SDK)/file/BPMAPFileWriter.cpp\ $(FUSION_SDK)/file/CDFFileData.cpp\ $(FUSION_SDK)/file/CELFileData.cpp\ $(FUSION_SDK)/file/CHPFileData.cpp\ $(FUSION_SDK)/file/FileIO.cpp\ $(FUSION_SDK)/file/FileWriter.cpp\ $(FUSION_SDK)/file/TsvFile/ClfFile.cpp\ $(FUSION_SDK)/file/TsvFile/PgfFile.cpp\ $(FUSION_SDK)/file/TsvFile/TsvFile.cpp\ $(FUSION_SDK)/util/AffxByteArray.cpp\ $(FUSION_SDK)/util/AffxConv.cpp\ $(FUSION_SDK)/util/MsgStream.cpp\ $(FUSION_SDK)/util/Util.cpp\ $(FUSION_SDK)/util/Err.cpp\ $(FUSION_SDK)/util/Fs.cpp\ $(FUSION_SDK)/util/Verbose.cpp\ $(FUSION_SDK)/util/RowFile.cpp\ $(FUSION_SDK)/util/TableFile.cpp\ $(FUSION_SDK)/util/Convert.cpp\ R_affx_cel_parser.cpp\ R_affx_cdf_parser.cpp\ R_affx_cdf_extras.cpp\ R_affx_bpmap_parser.cpp\ R_affx_clf_pgf_parser.cpp\ R_affx_chp_parser.cpp PKG_C_SOURCES = \ 000.init.c PKG_SOURCES=$(PKG_CPP_SOURCES) $(PKG_C_SOURCES) CPP_OBJECTS=$(PKG_CPP_SOURCES:.cpp=.o) C_OBJECTS=$(PKG_C_SOURCES:.c=.o) OBJECTS=$(CPP_OBJECTS) $(C_OBJECTS) all: myclean $(SHLIB) myclean: rm -f $(OBJECTS) affxparser/src/RAffxErrHandler.h0000644000175200017520000000143414516003651017655 0ustar00biocbuildbiocbuild#ifndef RAFFXERRHANDLER_H #define RAFFXERRHANDLER_H #include #include "ErrHandler.h" #include "Verbose.h" #include "Except.h" /** * @brief Basic R_affx exception-forwarding error handler */ class RAffxErrHandler : public ErrHandler { public: RAffxErrHandler(bool doThrow) { m_Throw = doThrow; } virtual ~RAffxErrHandler() {} /** * Forward the exception, presumably back to R code. * * @param msg - about what went wrong. */ virtual void handleError(const std::string &msg) { if(m_Throw) { throw Except(msg); } else { Verbose::warn(0, msg); exit(1); } } private: bool m_Throw; ///< throw an exception or abort()? }; #endif /* RAFFXERRHANDLER_H */ affxparser/src/R_affx_bpmap_parser.cpp0000644000175200017520000005750614516003651021206 0ustar00biocbuildbiocbuild#include "BPMAPFileData.h" #include "BPMAPFileWriter.h" // #include "FusionBPMAPData.h" #include "R_affx_constants.h" using namespace std; /* ToDo: Implement partial reading */ /* Need to take care of mmx, mmy, when we have a pmonly */ #include #include #include #include extern "C" { /************************************************************** * * R_affx_get_bpmap_file() * *************************************************************/ SEXP R_affx_get_bpmap_header(SEXP fname){ affxbpmap::CBPMAPFileData bpmap; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Process arguments * - - - - - - - - - - - - - - - - - - - - - - - - - - - */ const char* bpmapFileName = CHAR(STRING_ELT(fname,0)); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Opens stuff * - - - - - - - - - - - - - - - - - - - - - - - - - - - */ bpmap.SetFileName(bpmapFileName); if (bpmap.ReadHeader() == false) { error("Unable to read file: %s\n", bpmapFileName); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Define Return Object: returnList, returnListNames * * We also read the object. * - - - - - - - - - - - - - - - - - - - - - - - - - - - */ SEXP returnList, returnListNames, tmp; int kk =0; PROTECT(returnList = NEW_LIST(2)); PROTECT(returnListNames = NEW_CHARACTER(2)); SET_STRING_ELT(returnListNames, kk, mkChar("version")); tmp = NEW_NUMERIC(1); SET_VECTOR_ELT(returnList, kk++, tmp); REAL(tmp)[0] = bpmap.GetVersion(); SET_STRING_ELT(returnListNames, kk, mkChar("numSequences")); tmp = NEW_INTEGER(1); SET_VECTOR_ELT(returnList, kk++, tmp); INTEGER(tmp)[0] = bpmap.GetNumberSequences(); setAttrib(returnList, R_NamesSymbol, returnListNames); bpmap.Close(); UNPROTECT(2); return returnList; } /* R_affx_get_bpmap_header */ /************************************************************** * * R_affx_bpmap_seqinfo_item() * * An internal function * *************************************************************/ SEXP R_affx_bpmap_seqinfo_item(affxbpmap::CGDACSequenceItem &seq, int i_verboseFlag){ if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf(" Reading seqInfo\n"); } SEXP seqInfo, seqInfoNames, seqInfoParameters, seqInfoParameterNames; int kk = 0; string str; int str_length; char* cstr; PROTECT(seqInfo = NEW_LIST(8)); PROTECT(seqInfoNames = NEW_CHARACTER(8)); SET_STRING_ELT(seqInfoNames, kk, mkChar("name")); str = seq.GetName(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_VECTOR_ELT(seqInfo, kk++, ScalarString(mkChar(cstr))); Free(cstr); SET_STRING_ELT(seqInfoNames, kk, mkChar("groupname")); str = seq.GroupName(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_VECTOR_ELT(seqInfo, kk++, ScalarString(mkChar(cstr))); Free(cstr); SET_STRING_ELT(seqInfoNames, kk, mkChar("fullname")); str = seq.FullName(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_VECTOR_ELT(seqInfo, kk++, ScalarString(mkChar(cstr))); Free(cstr); SET_STRING_ELT(seqInfoNames, kk, mkChar("version")); str = seq.GetSeqVersion(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_VECTOR_ELT(seqInfo, kk++, ScalarString(mkChar(cstr))); Free(cstr); SET_STRING_ELT(seqInfoNames, kk, mkChar("mapping")); if(seq.GetProbeMapping() == 0) SET_VECTOR_ELT(seqInfo, kk++, ScalarString(mkChar("pmmm"))); else SET_VECTOR_ELT(seqInfo, kk++, ScalarString(mkChar("onlypm"))); SET_STRING_ELT(seqInfoNames, kk, mkChar("number")); SET_VECTOR_ELT(seqInfo, kk++, ScalarInteger(seq.GetNumber() + 1)); SET_STRING_ELT(seqInfoNames, kk, mkChar("numberOfHits")); SET_VECTOR_ELT(seqInfo, kk++, ScalarInteger(seq.GetNumberHits())); /* Now we read the parameters, which is basically * a long character vector of different parameter * values */ int nParameters = seq.GetNumberParameters(); if(nParameters > 0) { if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf(" Reading %d seqInfoParameters\n", nParameters); } PROTECT(seqInfoParameters = NEW_CHARACTER(nParameters)); PROTECT(seqInfoParameterNames = NEW_CHARACTER(nParameters)); for(int j = 0; j < nParameters; j++) { str = seq.GetParameter(j).Tag; str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(seqInfoParameterNames, j, mkChar(cstr)); Free(cstr); str = seq.GetParameter(j).Value; cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(seqInfoParameters, j, mkChar(cstr)); Free(cstr); } setAttrib(seqInfoParameters, R_NamesSymbol, seqInfoParameterNames); SET_VECTOR_ELT(seqInfo, 7, seqInfoParameters); UNPROTECT(2); } else { SET_VECTOR_ELT(seqInfo, 7, R_NilValue); } SET_STRING_ELT(seqInfoNames, 7, mkChar("parameters")); setAttrib(seqInfo, R_NamesSymbol, seqInfoNames); UNPROTECT(2); return seqInfo; } /* R_affx_bpmap_seqinfo_item */ /************************************************************** * * R_affx_get_bpmap_seqinfo() * *************************************************************/ SEXP R_affx_get_bpmap_seqinfo(SEXP fname, SEXP seqindices, SEXP verbose){ affxbpmap::CBPMAPFileData bpmap; string str; int str_length; char* cstr; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Process arguments * - - - - - - - - - - - - - - - - - - - - - - - - - - - */ const char* bpmapFileName = CHAR(STRING_ELT(fname,0)); int i_verboseFlag = INTEGER(verbose)[0]; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Opens stuff * - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("attempting to read: %s\n", bpmapFileName); } bpmap.SetFileName(bpmapFileName); if (bpmap.Exists() == false) { error("File does not exist: %s\n", bpmapFileName); } if (bpmap.Read() == false) { error("Unable to read file: %s, is it a BPMAP file?\n", bpmapFileName); } if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("sucessfully read: %s\n", bpmapFileName); } /* checking whether or not we are reading everything or just * some of the sequences. We assume that R delivers the * indices in a sorted way */ int nSequenceIndices = length(seqindices); int nSequences = bpmap.GetNumberSequences(); bool readAllSequences = false; if(nSequenceIndices == 0) { readAllSequences = true; } else { /* and some error checking of the argument */ for(int i = 0; i < nSequenceIndices; i++){ if((INTEGER(seqindices)[i] < 0) || (INTEGER(seqindices)[i] > nSequences)) error("seqIndices out of range"); } nSequences = nSequenceIndices; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Define Return Object: returnList, returnListNames * - - - - - - - - - - - - - - - - - - - - - - - - - - - */ SEXP returnList, returnListNames; PROTECT(returnList = NEW_LIST(nSequences)); PROTECT(returnListNames = NEW_CHARACTER(nSequences)); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Read the sequences * - - - - - - - - - - - - - - - - - - - - - - - - - - - */ affxbpmap::CGDACSequenceItem seq; for(int i = 0; i < nSequences; i++){ if(readAllSequences == true) { bpmap.GetSequenceItem(i, seq); } else { /* indices in Fusion are zero-based */ bpmap.GetSequenceItem(INTEGER(seqindices)[i] - 1, seq); } if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Reading sequence object: %s\n", seq.GetName().c_str()); } SET_VECTOR_ELT(returnList, i, R_affx_bpmap_seqinfo_item(seq, i_verboseFlag)); str = seq.FullName(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(returnListNames, i, mkChar(cstr)); Free(cstr); } setAttrib(returnList, R_NamesSymbol, returnListNames); bpmap.Close(); UNPROTECT(2); return returnList; } /* R_affx_get_bpmap_seqinfo */ /************************************************************** * * R_affx_get_bpmap_file() * *************************************************************/ SEXP R_affx_get_bpmap_file(SEXP fname, SEXP seqindices, SEXP readSeqInfo, SEXP readStartPos, SEXP readCenterPos, SEXP readProbeSeq, SEXP readStrand, SEXP readPMXY, SEXP readMMXY, SEXP readMatchScore, SEXP readProbeLength, SEXP verbose) { affxbpmap::CBPMAPFileData bpmap; string str; int str_length; char* cstr; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Process arguments * - - - - - - - - - - - - - - - - - - - - - - - - - - - */ const char* bpmapFileName = CHAR(STRING_ELT(fname,0)); int i_readSeqInfo = INTEGER(readSeqInfo)[0]; int i_readStartPos = INTEGER(readStartPos)[0]; int i_readCenterPos = INTEGER(readCenterPos)[0]; int i_readProbeSeq = INTEGER(readProbeSeq)[0]; int i_readStrand = INTEGER(readStrand)[0]; int i_readPMXY = INTEGER(readPMXY)[0]; int i_readMMXY = INTEGER(readMMXY)[0]; int i_readMatchScore = INTEGER(readMatchScore)[0]; int i_readProbeLength = INTEGER(readProbeLength)[0]; int i_verboseFlag = INTEGER(verbose)[0]; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Opens stuff * - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("attempting to read: %s\n", bpmapFileName); } bpmap.SetFileName(bpmapFileName); if (bpmap.Exists() == false) { error("File does not exist: %s\n", bpmapFileName); } if (bpmap.Read() == false) { error("Unable to read file: %s, is it a BPMAP file?\n", bpmapFileName); } if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("sucessfully read: %s\n", bpmapFileName); } /* checking whether or not we are reading everything or just * some of the sequences. We assume that R delivers the * indices in a sorted way */ int nSequenceIndices = length(seqindices); int nSequences = bpmap.GetNumberSequences(); bool readAllSequences = false; if(nSequenceIndices == 0) { readAllSequences = true; } else { /* and some error checking of the argument */ for(int i = 0; i < nSequenceIndices; i++){ if((INTEGER(seqindices)[i] < 0) || (INTEGER(seqindices)[i] > nSequences)) error("seqIndices out of range"); } nSequences = nSequenceIndices; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Define Return Object: returnList, returnListNames * * In this case, each component of resultList is call * seqObj, which holds the relevant components * - - - - - - - - - - - - - - - - - - - - - - - - - - - */ SEXP resultList, resultListNames; PROTECT(resultList = NEW_LIST(nSequences)); PROTECT(resultListNames = NEW_CHARACTER(nSequences)); SEXP seqObj, seqObjNames; /* slots in seqObj */ SEXP pmx, pmy, mmx, mmy, probeseq, strand, probelength, matchscore, startpos, centerpos; affxbpmap::CGDACSequenceItem seq; affxbpmap::GDACSequenceHitItemType seqHit; int seqObjLength = i_readSeqInfo + i_readStartPos + i_readCenterPos + i_readProbeSeq + i_readStrand + 2 * i_readPMXY + 2 * i_readMMXY + i_readMatchScore + i_readProbeLength; int protectCount = 0; int kk = 0; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Read the sequences. * - - - - - - - - - - - - - - - - - - - - - - - - - - - */ for(int i = 0; i < nSequences; i++) { /* Initializing memory, we free it at the end of each sequence */ protectCount = 0; kk = 0; PROTECT(seqObj = NEW_LIST(seqObjLength)); protectCount++; PROTECT(seqObjNames = NEW_CHARACTER(seqObjLength)); protectCount++; if(readAllSequences) { bpmap.GetSequenceItem(i, seq); } else { /* Fusion indices are zero base */ bpmap.GetSequenceItem(INTEGER(seqindices)[i] - 1, seq); } if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Reading sequence object: %s\n", seq.GetName().c_str()); } if(i_readSeqInfo) { SET_VECTOR_ELT(seqObj, kk, R_affx_bpmap_seqinfo_item(seq, i_verboseFlag)); SET_STRING_ELT(seqObjNames, kk++, mkChar("seqInfo")); } int nHits = seq.GetNumberHits(); if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf(" Allocating memory for %d hits\n", nHits); } int onlyPM = seq.GetProbeMapping(); if (i_readPMXY) { PROTECT(pmx = NEW_INTEGER(nHits)); protectCount++; PROTECT(pmy = NEW_INTEGER(nHits)); protectCount++; } if (i_readMMXY and !onlyPM) { PROTECT(mmx = NEW_INTEGER(nHits)); protectCount++; PROTECT(mmy = NEW_INTEGER(nHits)); protectCount++; } if (i_readMatchScore) { PROTECT(matchscore = NEW_NUMERIC(nHits)); protectCount++; } if (i_readStrand) { PROTECT(strand = NEW_INTEGER(nHits)); protectCount++; } if (i_readProbeLength) { PROTECT(probelength = NEW_INTEGER(nHits)); protectCount++; } if (i_readStartPos) { PROTECT(startpos = NEW_INTEGER(nHits)); protectCount++; } if (i_readCenterPos) { PROTECT(centerpos = NEW_INTEGER(nHits)); protectCount++; } if (i_readProbeSeq) { PROTECT(probeseq = NEW_CHARACTER(nHits)); protectCount++; } if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("Reading hit number:\n"); } for(int j = 0; j < nHits; j++) { if(j % 1000 == 999) { R_CheckUserInterrupt(); if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf(" %d\n", j + 1); } } seq.GetHitItem(j, seqHit, i_readProbeSeq); if (i_readPMXY) { INTEGER(pmx)[j] = seqHit.PMX; INTEGER(pmy)[j] = seqHit.PMY; } if (i_readMMXY && !onlyPM) { INTEGER(mmx)[j] = seqHit.MMX; INTEGER(mmy)[j] = seqHit.MMY; } if (i_readStrand) { INTEGER(strand)[j] = (unsigned int)seqHit.TopStrand; } if (i_readProbeSeq) { str = seqHit.PMProbe; str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(probeseq, j, mkChar(cstr)); Free(cstr); } if (i_readStartPos) { INTEGER(startpos)[j] = seqHit.getStartPosition(); } if (i_readCenterPos) { INTEGER(centerpos)[j] = seqHit.getCenterPosition(); } if (i_readMatchScore) { REAL(matchscore)[j] = (double)seqHit.MatchScore; } if (i_readProbeLength) { INTEGER(probelength)[j] = (int)seqHit.ProbeLength; } } /* Now it is time to finalize the seqObj */ if (i_readPMXY) { SET_VECTOR_ELT(seqObj, kk, pmx); SET_STRING_ELT(seqObjNames, kk++, mkChar("pmx")); SET_VECTOR_ELT(seqObj, kk, pmy); SET_STRING_ELT(seqObjNames, kk++, mkChar("pmy")); } if (i_readMMXY) { if(!onlyPM) { SET_VECTOR_ELT(seqObj, kk, mmx); SET_STRING_ELT(seqObjNames, kk++, mkChar("mmx")); SET_VECTOR_ELT(seqObj, kk, mmy); SET_STRING_ELT(seqObjNames, kk++, mkChar("mmy")); } else { SET_VECTOR_ELT(seqObj, kk, R_NilValue); SET_STRING_ELT(seqObjNames, kk++, mkChar("mmx")); SET_VECTOR_ELT(seqObj, kk, R_NilValue); SET_STRING_ELT(seqObjNames, kk++, mkChar("mmy")); } } if (i_readProbeSeq) { SET_VECTOR_ELT(seqObj, kk, probeseq); SET_STRING_ELT(seqObjNames, kk++, mkChar("probeseq")); } if (i_readStrand) { SET_VECTOR_ELT(seqObj, kk, strand); SET_STRING_ELT(seqObjNames, kk++, mkChar("strand")); } if (i_readStartPos) { SET_VECTOR_ELT(seqObj, kk, startpos); SET_STRING_ELT(seqObjNames, kk++, mkChar("startpos")); } if (i_readCenterPos) { SET_VECTOR_ELT(seqObj, kk, centerpos); SET_STRING_ELT(seqObjNames, kk++, mkChar("centerpos")); } if (i_readProbeLength) { SET_VECTOR_ELT(seqObj, kk, probelength); SET_STRING_ELT(seqObjNames, kk++, mkChar("probelength")); } if (i_readMatchScore) { SET_VECTOR_ELT(seqObj, kk, matchscore); SET_STRING_ELT(seqObjNames, kk++, mkChar("matchscore")); } if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Finalizing sequence %s\n", seq.FullName().c_str()); } setAttrib(seqObj, R_NamesSymbol, seqObjNames); SET_VECTOR_ELT(resultList, i, seqObj); str = seq.FullName(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(resultListNames, i, mkChar(cstr)); Free(cstr); UNPROTECT(protectCount); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Return stuff * - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("Finalizing BPMAP object\n"); } bpmap.Close(); setAttrib(resultList, R_NamesSymbol, resultListNames); UNPROTECT(2); return resultList; }/* R_affx_get_bpmap_file() */ /********************************************************* ********************************************************* File Writers ********************************************************* ********************************************************/ SEXP R_affx_write_bpmap_file(SEXP bpname, SEXP tpname, SEXP verbose){ const char* bpmapFileName = CHAR(STRING_ELT(bpname,0)); const char* tpmapFileName = CHAR(STRING_ELT(tpname,0)); affxbpmapwriter::CBPMAPFileWriter bpmap; bpmap.SetTpmapFileName(tpmapFileName); int i_verboseFlag = INTEGER(verbose)[0]; if(bpmap.TpmapExists() == false){ error("tpmap file %s does not exist.\n", tpmapFileName); } bpmap.SetTpmapFileName(tpmapFileName); if(i_verboseFlag >= R_AFFX_VERBOSE){ Rprintf("Reading tpmap file: %s\n", tpmapFileName); } if(bpmap.ReadTpmap() == false){ error("Tpmap file %s cannot be read\n", tpmapFileName); } if(i_verboseFlag >= R_AFFX_VERBOSE){ Rprintf("Writing bpmap file %s\n", bpmapFileName); } bpmap.SetFileName(bpmapFileName); if(bpmap.WriteBpmap() == false){ error("Bpmap file %s could not be written\n", bpmapFileName); } return R_NilValue; } /** end R_affx_write_bpmap_file **/ } /** end extern "C" **/ affxparser/src/R_affx_cdf_extras.cpp0000644000175200017520000004106014516003651020641 0ustar00biocbuildbiocbuild#include "FusionCDFData.h" #include #include "R_affx_constants.h" using namespace std; using namespace affymetrix_fusion_io; #include #include #include #include extern "C" { /************************************************************************ * * R_affx_cdf_nbrOfCellsPerUnitGroup() * ************************************************************************/ SEXP R_affx_cdf_nbrOfCellsPerUnitGroup(SEXP fname, SEXP units, SEXP verbose) { FusionCDFData cdf; FusionCDFFileHeader header; string str; int str_length; char* cstr; SEXP names = R_NilValue, probe_sets = R_NilValue, r_groups = R_NilValue, r_group_names = R_NilValue; bool readAll = true; int nsets = 0, nunits = 0; int iset = 0; const char* cdfFileName = CHAR(STRING_ELT(fname, 0)); int i_verboseFlag = INTEGER(verbose)[0]; FusionCDFProbeSetInformation probeset; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Opens file * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ cdf.SetFileName(cdfFileName); if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Attempting to read CDF File: %s\n", cdf.GetFileName().c_str()); } if (cdf.Read() == false) { error("Failed to read the CDF file."); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Read header and get unit indices to be read * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ header = cdf.GetHeader(); nsets = header.GetNumProbeSets(); nunits = length(units); if (nunits == 0) { nunits = nsets; } else { readAll = false; /* Validate argument 'units': */ for (int ii = 0; ii < nunits; ii++) { iset = INTEGER(units)[ii]; if (iset < 1 || iset > nsets) { error("Argument 'units' contains an element out of range."); } } } /* Allocate R character vector and R list for the names and units */ PROTECT(names = NEW_CHARACTER(nunits)); PROTECT(probe_sets = NEW_LIST(nunits)); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Access information for the requested units one by one * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ for (int ii = 0; ii < nunits; ii++) { if (readAll) { iset = ii; } else { /* Unit indices are zero-based in Fusion SDK. */ iset = INTEGER(units)[ii] - 1; } /* Retrieve the current unit */ cdf.GetProbeSetInformation(iset, probeset); /* Record its name */ str = cdf.GetProbeSetName(iset); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(names, ii, mkChar(cstr)); Free(cstr); /* Get the number of groups in the unit */ int ngroups = probeset.GetNumGroups(); /* Allocate an R list and a vector of names for the groups */ PROTECT(r_groups = NEW_INTEGER(ngroups)); PROTECT(r_group_names = NEW_CHARACTER(ngroups)); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * For each group, query the number of cells (probes) * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ for (int igroup = 0; igroup < ngroups; igroup++) { FusionCDFProbeGroupInformation group; /* Get the current group */ probeset.GetGroupInformation(igroup, group); /* Get the name of the group */ str = group.GetName(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(r_group_names, igroup, mkChar(cstr)); Free(cstr); /* Get the number of cells (probes) in the group */ int ncells = group.GetNumCells(); INTEGER(r_groups)[igroup] = ncells; } /** set the group names. **/ setAttrib(r_groups, R_NamesSymbol, r_group_names); /** now set the probe_set in the main probe_set list. **/ SET_VECTOR_ELT(probe_sets, ii, r_groups); /** pop the group list and group names of the stack. **/ UNPROTECT(2); /* 'r_group_names' and then 'r_groups' */ } /** set the names down here at the end. **/ setAttrib(probe_sets, R_NamesSymbol, names); /** unprotect the names and the main probe set list.**/ UNPROTECT(2); /* 'names' and then 'probe_sets' */ return probe_sets; } /************************************************************************ * * R_affx_cdf_groupNames() * ************************************************************************/ SEXP R_affx_cdf_groupNames(SEXP fname, SEXP units, SEXP truncateGroupNames, SEXP verbose) { FusionCDFData cdf; FusionCDFFileHeader header; string str; int str_length; char* cstr; SEXP names = R_NilValue, probe_sets = R_NilValue, r_group_names = R_NilValue; bool readAll = true; int nsets = 0, nunits = 0; int iset = 0; const char* cdfFileName = CHAR(STRING_ELT(fname, 0)); int i_truncateGroupNames = INTEGER(truncateGroupNames)[0]; int i_verboseFlag = INTEGER(verbose)[0]; /** pointer to the name of the probeset. **/ char* name; char bfr[512]; FusionCDFProbeSetInformation probeset; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Opens file * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ cdf.SetFileName(cdfFileName); if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Attempting to read CDF File: %s\n", cdf.GetFileName().c_str()); } if (cdf.Read() == false) { error("Failed to read the CDF file."); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Read header and get unit indices to be read * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ header = cdf.GetHeader(); nsets = header.GetNumProbeSets(); nunits = length(units); if (nunits == 0) { nunits = nsets; } else { readAll = false; /* Validate argument 'units': */ for (int ii = 0; ii < nunits; ii++) { iset = INTEGER(units)[ii]; if (iset < 1 || iset > nsets) { error("Argument 'units' contains an element out of range."); } } } /* Allocate R character vector and R list for the names and units */ PROTECT(probe_sets = NEW_LIST(nunits)); PROTECT(names = NEW_CHARACTER(nunits)); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Access information for the requested units one by one * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ for (int ii = 0; ii < nunits; ii++) { if (readAll) { iset = ii; } else { /* Unit indices are zero-based in Fusion SDK. */ iset = INTEGER(units)[ii] - 1; } /* Retrieve the current unit */ cdf.GetProbeSetInformation(iset, probeset); /* Record its name */ str = cdf.GetProbeSetName(iset); str_length = str.size(); name = Calloc(str_length+1, char); strncpy(name, str.c_str(), str_length); name[str_length] = '\0'; SET_STRING_ELT(names, ii, mkChar(name)); /* Get the number of groups in the unit */ int ngroups = probeset.GetNumGroups(); /* Allocate a vector of names for the groups */ PROTECT(r_group_names = NEW_CHARACTER(ngroups)); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * For each group, query the number of cells (probes) * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ for (int igroup = 0; igroup < ngroups; igroup++) { FusionCDFProbeGroupInformation group; /* Get the current group */ probeset.GetGroupInformation(igroup, group); /* Get the name of the group */ str = group.GetName(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; /* If group name starts with the unit name, and i_truncateGroupNames is TRUE, strip it off. */ int len = strlen(name); int res = strncmp(cstr, name, len); if (res == 0 && i_truncateGroupNames) { int last = strlen(cstr)-len; for (int kk = 0; kk < last; kk++) bfr[kk] = cstr[len+kk]; bfr[last] = '\0'; SET_STRING_ELT(r_group_names, igroup, mkChar(bfr)); } else { SET_STRING_ELT(r_group_names, igroup, mkChar(cstr)); } Free(cstr); } /** now set the probe_set in the main probe_set list. **/ SET_VECTOR_ELT(probe_sets, ii, r_group_names); /** pop the group list and group names of the stack. **/ UNPROTECT(1); /* 'r_group_names' */ Free(name); } /* for (int ii=0 ...) */ /** set the names down here at the end. **/ setAttrib(probe_sets, R_NamesSymbol, names); /** unprotect the names and the main probe set list.**/ UNPROTECT(2); /* 'names' and then 'probe_sets' */ return probe_sets; } /* R_affx_cdf_groupNames() */ /************************************************************************ * * R_affx_pt_base_is_pm * * Return 1 if p_base/t_base pair is PM (complementary), 0 otherwise. * ************************************************************************/ int R_affx_pt_base_is_pm(char p_base, char t_base) { int is_pm = 0; /* 0 is false, 1 is true */ if (p_base == t_base) { is_pm = 0; } else { if ((p_base == 'a' || p_base == 'A') && (t_base == 't' || t_base == 'T')) { is_pm = 1; } else if ((p_base == 't' || p_base == 'T') && (t_base == 'a' || t_base == 'A')) { is_pm = 1; } else if ((p_base == 'c' || p_base == 'C') && (t_base == 'g' || t_base == 'G')) { is_pm = 1; } else if ((p_base == 'g' || p_base == 'G') && (t_base == 'c' || t_base == 'C')) { is_pm = 1; } else { is_pm = 0; } } return is_pm; } /* R_affx_pt_base_is_pm() */ /************************************************************************ * * R_affx_cdf_isPm() * ************************************************************************/ SEXP R_affx_cdf_isPm(SEXP fname, SEXP units, SEXP verbose) { FusionCDFData cdf; FusionCDFFileHeader header; string str; int str_length; char* cstr; SEXP names = R_NilValue, probe_sets = R_NilValue, r_groups = R_NilValue, r_group_names = R_NilValue, isPm = R_NilValue; bool readAll = true; int nsets = 0, nunits = 0; int iset = 0; const char* cdfFileName = CHAR(STRING_ELT(fname, 0)); int i_verboseFlag = INTEGER(verbose)[0]; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Opens file * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ cdf.SetFileName(cdfFileName); if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Attempting to read CDF File: %s\n", cdf.GetFileName().c_str()); } if (cdf.Read() == false) { error("Failed to read the CDF file."); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Read header and get unit indices to be read * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ header = cdf.GetHeader(); nsets = header.GetNumProbeSets(); nunits = length(units); if (nunits == 0) { nunits = nsets; } else { readAll = false; /* Validate argument 'units': */ for (int ii = 0; ii < nunits; ii++) { iset = INTEGER(units)[ii]; if (iset < 1 || iset > nsets) { error("Argument 'units' contains an element out of range."); } } } /* Allocate R character vector and R list for the names and units */ PROTECT(probe_sets = NEW_LIST(nunits)); PROTECT(names = NEW_CHARACTER(nunits)); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Access information for the requested units one by one * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ for (int ii = 0; ii < nunits; ii++) { FusionCDFProbeSetInformation probeset; if (readAll) { iset = ii; } else { /* Unit indices are zero-based in Fusion SDK. */ iset = INTEGER(units)[ii] - 1; } /* Retrieve the current unit */ cdf.GetProbeSetInformation(iset, probeset); /* Record its name */ str = cdf.GetProbeSetName(iset); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(names, ii, mkChar(cstr)); Free(cstr); /* Get the number of groups in the unit */ int ngroups = probeset.GetNumGroups(); /* Allocate an R list and a vector of names for the groups */ PROTECT(r_groups = NEW_LIST(ngroups)); PROTECT(r_group_names = NEW_CHARACTER(ngroups)); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * For each group, query the number of cells (probes) * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ for (int igroup = 0; igroup < ngroups; igroup++) { FusionCDFProbeGroupInformation group; /* Get the current group */ probeset.GetGroupInformation(igroup, group); /* Get the name of the group */ str = group.GetName(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(r_group_names, igroup, mkChar(cstr)); Free(cstr); /* Get the number of cells (probes) in the group */ int ncells = group.GetNumCells(); PROTECT(isPm = NEW_LOGICAL(ncells)); /* For each cell, record if it is an mis-match probe or not */ for (int icell = 0; icell < ncells; icell++) { FusionCDFProbeInformation probe; group.GetCell(icell, probe); char p_base = probe.GetPBase(); char t_base = probe.GetTBase(); LOGICAL(isPm)[icell] = R_affx_pt_base_is_pm(p_base, t_base); } /* for (int icell ...) */ SET_VECTOR_ELT(r_groups, igroup, isPm); UNPROTECT(1); /* 'isPm' */ } /** set the group names. **/ setAttrib(r_groups, R_NamesSymbol, r_group_names); /** now set the probe_set in the main probe_set list. **/ SET_VECTOR_ELT(probe_sets, ii, r_groups); /** pop the group list and group names of the stack. **/ UNPROTECT(2); /* 'r_group_names' and then 'r_groups' */ } /** set the names down here at the end. **/ setAttrib(probe_sets, R_NamesSymbol, names); /** unprotect the names and the main probe set list.**/ UNPROTECT(2); /* 'names' and then 'probe_sets' */ return probe_sets; } /* R_affx_cdf_isPm() */ } /** end extern C **/ /*************************************************************************** * HISTORY: * 2007-03-05 * o Added argument 'truncateGroupNames' to R_affx_cdf_group_names(). * 2006-11-27 * o Added Seth Falcon's help function R_affx_pt_base_is_pm(). * o Made R_affx_cdf_isMm() & R_affx_cdf_isPmOrMm() deprecated, because * they've never been used. /HB * 2006-03-28 * o Unit indices are now one-based. /HB * 2006-01-15 * 2006-01-15 * o It is now possible to specify what readCdfUnits() should return. /HB * 2006-01-12 * o BUG FIX: The check of the upper-limit of unit indicies was done * assuming one-based indices. /HB * o Added R_affx_cdf_isMm(), R_affx_cdf_isPm() & R_affx_cdf_isPmOrMm(). /HB * 2006-01-11 * o Added R_affx_cdf_nbrOfCellsPerUnitGroup() /HB * 2006-01-10 * o Updated the "units" code to be more similar to the corresponding code * for CEL files. /HB * o Added a return value to non-void function R_affx_get_cdf_file_qc(). /HB * 2006-01-09 * o Added R_affx_get_cdf_units() and R_affx_get_cdf_unit.names(). /HB * o Created. The purpose was to make it possible to read subsets of units * and not just all units at once. /HB **************************************************************************/ affxparser/src/R_affx_cdf_extras.h0000644000175200017520000000031414516003651020303 0ustar00biocbuildbiocbuild#if !defined(R_AFFX_CDF_EXTRAS_H) #define R_AFFX_CDF_EXTRAS_H extern "C" { /* utility functions that can be used elsewhere in the code */ int R_affx_pt_base_is_pm(char p_base, char t_base); } #endif affxparser/src/R_affx_cdf_parser.cpp0000644000175200017520000017377314516003651020650 0ustar00biocbuildbiocbuild#include "FusionCDFData.h" #include #include "R_affx_constants.h" #include "R_affx_cdf_extras.h" using namespace std; using namespace affymetrix_fusion_io; #include #include #include #include extern "C" { /************************************************************************ * * R_affx_get_pmmm_list() * * This function conforms to the specification for getting an * environment for CEL files. * ************************************************************************/ SEXP R_affx_get_pmmm_list(SEXP fname, SEXP complementary_logic, SEXP verbose) { FusionCDFData cdf; FusionCDFFileHeader header; SEXP names, dim, pmmm, pairs; int nRows = 0; const char* cdfFileName = CHAR(STRING_ELT(fname, 0)); int i_verboseFlag = INTEGER(verbose)[0]; string str; int str_length; char* cstr; cdf.SetFileName(cdfFileName); if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Attempting to read CDF File: %s\n", cdf.GetFileName().c_str()); } if (cdf.Read() == false) { error("Failed to read the CDF file."); } header = cdf.GetHeader(); int nsets = header.GetNumProbeSets(); PROTECT(pmmm = NEW_LIST(nsets)); PROTECT(names = NEW_CHARACTER(nsets)); nRows = header.GetRows(); for (int iset = 0; iset < nsets; iset++) { str = cdf.GetProbeSetName(iset); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(names, iset, mkChar(cstr)); if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Processing probeset: %s\n", cstr); } Free(cstr); FusionCDFProbeSetInformation set; cdf.GetProbeSetInformation(iset, set); int ngroups = set.GetNumGroups(); for (int igroup = 0; igroup < ngroups; igroup++) { FusionCDFProbeGroupInformation group; set.GetGroupInformation(igroup, group); int ncells = group.GetNumCells(); PROTECT(pairs = NEW_INTEGER(ncells)); for (int icell = 0; icell < ncells; icell++) { FusionCDFProbeInformation probe; group.GetCell(icell, probe); int x = probe.GetX(); int y = probe.GetY(); int index = (x + 1) + (nRows * y); INTEGER(pairs)[icell] = index; } /** Set up the dimensions here we are transposed. **/ PROTECT(dim = NEW_INTEGER(2)); INTEGER_POINTER(dim)[1] = ncells/2; INTEGER_POINTER(dim)[0] = 2; SET_DIM(pairs, dim); /** Set the pmmm matrix for this ProbeSet. **/ SET_VECTOR_ELT(pmmm, iset, pairs); /** pop off dimension, and the pairs matrix. **/ UNPROTECT(2); /* 'dim' and then 'pairs' */ } } /** set the names of the list. **/ setAttrib(pmmm, R_NamesSymbol, names); /** pop the names, and the vector. **/ UNPROTECT(2); /* 'names' and then 'pmmm' */ return pmmm; } /************************************************************************ * * R_affx_get_cdf_file_qc() * ************************************************************************/ SEXP R_affx_get_cdf_file_qc(SEXP fname, SEXP unitIndices, SEXP verbose, SEXP returnIndices, SEXP returnXY, SEXP returnLengths, SEXP returnPMInfo, SEXP returnBackgroundInfo, SEXP returnType, SEXP returnQCNumbers) { FusionCDFData cdf; string str; SEXP r_qcunits_list = R_NilValue, r_qcunit = R_NilValue, r_qcunit_names = R_NilValue, r_indices = R_NilValue, r_xvals = R_NilValue, r_yvals = R_NilValue, r_lengths = R_NilValue, r_pminfo = R_NilValue, r_backgroundinfo = R_NilValue; int numQCUnits = 0; int nqccells = 0; int numCols = 0; int numQCUnitsInFile = 0; const char* cdfFileName = CHAR(STRING_ELT(fname, 0)); int i_verboseFlag = INTEGER(verbose)[0]; int i_returnIndices = INTEGER(returnIndices)[0]; int i_returnXY = INTEGER(returnXY)[0]; int i_returnLengths = INTEGER(returnLengths)[0]; int i_returnPMInfo = INTEGER(returnPMInfo)[0]; int i_returnBackgroundInfo = INTEGER(returnBackgroundInfo)[0]; int i_returnType = INTEGER(returnType)[0]; int i_returnQCNumbers = INTEGER(returnQCNumbers)[0]; bool readEveryUnit = true; int ii = 0; int qcunit_idx; cdf.SetFileName(cdfFileName); if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Attempting to read CDF File: %s\n", cdf.GetFileName().c_str()); } if (cdf.Read() == false) { error("Failed to read the CDF file."); } numQCUnitsInFile = cdf.GetHeader().GetNumQCProbeSets(); numCols = cdf.GetHeader().GetCols(); if(length(unitIndices) != 0){ for (int i = 0; i < length(unitIndices); i++) { if(INTEGER(unitIndices)[i] < 1 || INTEGER(unitIndices)[i] > numQCUnitsInFile) error("Argument 'units' contains an element out of range [%d,%d]: %d", 1, numQCUnitsInFile, INTEGER(unitIndices)[i]); } numQCUnits = length(unitIndices); readEveryUnit = false; } else { numQCUnits = numQCUnitsInFile; readEveryUnit = true; } /* ** Now we set up stuff for the different objects we need to return */ FusionCDFQCProbeSetInformation qcunit; FusionCDFQCProbeInformation qcprobe; /* ** First the units themselves */ int numCellArguments = i_returnIndices + 2 * i_returnXY + i_returnLengths + i_returnPMInfo + i_returnBackgroundInfo; int numQCUnitArguments = numCellArguments + i_returnType + i_returnQCNumbers; PROTECT(r_qcunit_names = NEW_CHARACTER(numQCUnitArguments)); ii = 0; if(i_returnXY) { SET_STRING_ELT(r_qcunit_names, ii++, mkChar("x")); SET_STRING_ELT(r_qcunit_names, ii++, mkChar("y")); } if(i_returnIndices) { SET_STRING_ELT(r_qcunit_names, ii++, mkChar("indices")); } if(i_returnLengths) { SET_STRING_ELT(r_qcunit_names, ii++, mkChar("length")); } if(i_returnPMInfo) { SET_STRING_ELT(r_qcunit_names, ii++, mkChar("pm")); } if(i_returnBackgroundInfo) { SET_STRING_ELT(r_qcunit_names, ii++, mkChar("background")); } if(i_returnType) { SET_STRING_ELT(r_qcunit_names, ii++, mkChar("type")); } if(i_returnQCNumbers) { SET_STRING_ELT(r_qcunit_names, ii++, mkChar("ncells")); } /* ** Now fixing up the QC type */ SEXP r_qcTypeAsString; PROTECT(r_qcTypeAsString = NEW_CHARACTER(19)); SET_STRING_ELT(r_qcTypeAsString, 0, mkChar("unknown")); SET_STRING_ELT(r_qcTypeAsString, 1, mkChar("checkerboardNegative")); SET_STRING_ELT(r_qcTypeAsString, 2, mkChar("checkerboardPositive")); SET_STRING_ELT(r_qcTypeAsString, 3, mkChar("hybeNegative")); SET_STRING_ELT(r_qcTypeAsString, 4, mkChar("hybePositive")); SET_STRING_ELT(r_qcTypeAsString, 5, mkChar("textFeaturesNegative")); SET_STRING_ELT(r_qcTypeAsString, 6, mkChar("textFeaturesPositive")); SET_STRING_ELT(r_qcTypeAsString, 7, mkChar("centralNegative")); SET_STRING_ELT(r_qcTypeAsString, 8, mkChar("centralPositive")); SET_STRING_ELT(r_qcTypeAsString, 9, mkChar("geneExpNegative")); SET_STRING_ELT(r_qcTypeAsString, 10, mkChar("geneExpPositive")); SET_STRING_ELT(r_qcTypeAsString, 11, mkChar("cycleFidelityNegative")); SET_STRING_ELT(r_qcTypeAsString, 12, mkChar("cycleFidelityPositive")); SET_STRING_ELT(r_qcTypeAsString, 13, mkChar("centralCrossNegative")); SET_STRING_ELT(r_qcTypeAsString, 14, mkChar("centralCrossPositive")); SET_STRING_ELT(r_qcTypeAsString, 15, mkChar("crossHybeNegative")); SET_STRING_ELT(r_qcTypeAsString, 16, mkChar("crossHybePositive")); SET_STRING_ELT(r_qcTypeAsString, 17, mkChar("SpatialNormNegative")); SET_STRING_ELT(r_qcTypeAsString, 18, mkChar("SpatialNormPositive")); PROTECT(r_qcunits_list = NEW_LIST(numQCUnits)); if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("Finished setup, commencing unit parsing.\n"); } /* ** Parsing the QCUnits */ for (int iqcunit = 0; iqcunit < numQCUnits; iqcunit++) { /* Check for interrupts */ if(iqcunit % 1000 == 999) R_CheckUserInterrupt(); /* The index, which is 0-based in Fusion unlike our R-api */ if(readEveryUnit) qcunit_idx = iqcunit; else qcunit_idx = INTEGER(unitIndices)[iqcunit] - 1; cdf.GetQCProbeSetInformation(qcunit_idx, qcunit); PROTECT(r_qcunit = NEW_LIST(numQCUnitArguments)); if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Processing QC unit: %d\n", qcunit_idx + 1); } nqccells = qcunit.GetNumCells(); if(numCellArguments > 0) { if(i_returnXY) { PROTECT(r_xvals = NEW_INTEGER(nqccells)); PROTECT(r_yvals = NEW_INTEGER(nqccells)); } if(i_returnIndices) PROTECT(r_indices = NEW_INTEGER(nqccells)); if(i_returnLengths) PROTECT(r_lengths = NEW_INTEGER(nqccells)); if(i_returnPMInfo) PROTECT(r_pminfo = NEW_LOGICAL(nqccells)); if(i_returnBackgroundInfo) PROTECT(r_backgroundinfo = NEW_LOGICAL(nqccells)); for (int icell = 0; icell < nqccells; icell++) { qcunit.GetProbeInformation(icell, qcprobe); if(i_returnXY) { INTEGER(r_xvals)[icell] = qcprobe.GetX(); INTEGER(r_yvals)[icell] = qcprobe.GetY(); } if(i_returnIndices) { INTEGER(r_indices)[icell] = qcprobe.GetY() * numCols + qcprobe.GetX() + 1; } if(i_returnLengths) { INTEGER(r_lengths)[icell] = qcprobe.GetPLen(); } if(i_returnPMInfo) { LOGICAL(r_pminfo)[icell] = qcprobe.IsPerfectMatchProbe(); } if(i_returnBackgroundInfo) { LOGICAL(r_backgroundinfo)[icell] = qcprobe.IsBackgroundProbe(); } } ii = 0; if(i_returnXY) { SET_VECTOR_ELT(r_qcunit, ii++, r_xvals); SET_VECTOR_ELT(r_qcunit, ii++, r_yvals); } if(i_returnIndices) { SET_VECTOR_ELT(r_qcunit, ii++, r_indices); } if(i_returnLengths) { SET_VECTOR_ELT(r_qcunit, ii++, r_lengths); } if(i_returnPMInfo) { SET_VECTOR_ELT(r_qcunit, ii++, r_pminfo); } if(i_returnBackgroundInfo) { SET_VECTOR_ELT(r_qcunit, ii++, r_backgroundinfo); } UNPROTECT(numCellArguments); } if(i_returnType) { switch (qcunit.GetQCProbeSetType()) { case affxcdf::UnknownQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 0))); break; case affxcdf::CheckerboardNegativeQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 1))); break; case affxcdf::CheckerboardPositiveQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 2))); break; case affxcdf::HybNegativeQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 3))); break; case affxcdf::HybPositiveQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 4))); break; case affxcdf::TextFeaturesNegativeQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 5))); break; case affxcdf::TextFeaturesPositiveQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 6))); break; case affxcdf::CentralNegativeQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 7))); break; case affxcdf::CentralPositiveQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 8))); break; case affxcdf::GeneExpNegativeQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 9))); break; case affxcdf::GeneExpPositiveQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 10))); break; case affxcdf::CycleFidelityNegativeQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 11))); break; case affxcdf::CycleFidelityPositiveQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 12))); break; case affxcdf::CentralCrossNegativeQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 13))); break; case affxcdf::CentralCrossPositiveQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 14))); break; case affxcdf::CrossHybNegativeQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 15))); break; case affxcdf::CrossHybPositiveQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 16))); break; case affxcdf::SpatialNormalizationNegativeQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 17))); break; case affxcdf::SpatialNormalizationPositiveQCProbeSetType: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 18))); break; default: SET_VECTOR_ELT(r_qcunit, ii++, ScalarString(STRING_ELT(r_qcTypeAsString, 0))); break; } } if(i_returnQCNumbers) { SET_VECTOR_ELT(r_qcunit, ii++, ScalarInteger(nqccells)); } setAttrib(r_qcunit, R_NamesSymbol, r_qcunit_names); SET_VECTOR_ELT(r_qcunits_list, iqcunit, r_qcunit); UNPROTECT(1); } /* r_qcunit_names, r_qcunits_list, r_qcTypeAsString */ UNPROTECT(3); return r_qcunits_list; } /************************************************************************ * * R_affx_get_cdf_file_header() * ************************************************************************/ SEXP R_affx_get_cdf_file_header(SEXP fname) { FusionCDFData cdf; const char* cdfFileName = CHAR(STRING_ELT(fname, 0)); string str; int str_length; char* cstr; cdf.SetFileName(cdfFileName); if (cdf.ReadHeader() == false) { error("Failed to read the CDF file header for: %s\n", cdfFileName); } SEXP vals = R_NilValue, names = R_NilValue, tmp = R_NilValue; int ii = 0, LIST_ELTS = 7; FusionCDFFileHeader header = cdf.GetHeader(); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Allocate return list * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ PROTECT(vals = NEW_LIST(LIST_ELTS)); PROTECT(names = NEW_CHARACTER(LIST_ELTS)); /* * Luis should add a version number */ SET_STRING_ELT(names, ii, mkChar("ncols")); PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = header.GetCols(); SET_VECTOR_ELT(vals, ii++, tmp); UNPROTECT(1); SET_STRING_ELT(names, ii, mkChar("nrows")); PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = header.GetRows(); SET_VECTOR_ELT(vals, ii++, tmp); UNPROTECT(1); SET_STRING_ELT(names, ii, mkChar("nunits")); PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = header.GetNumProbeSets(); SET_VECTOR_ELT(vals, ii++, tmp); UNPROTECT(1); SET_STRING_ELT(names, ii, mkChar("nqcunits")); PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = header.GetNumQCProbeSets(); SET_VECTOR_ELT(vals, ii++, tmp); UNPROTECT(1); SET_STRING_ELT(names, ii, mkChar("refseq")); str = header.GetReference(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_VECTOR_ELT(vals, ii++, mkString(cstr)); Free(cstr); SET_STRING_ELT(names, ii, mkChar("chiptype")); str = cdf.GetChipType(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_VECTOR_ELT(vals, ii++, mkString(cstr)); Free(cstr); SET_STRING_ELT(names, ii, mkChar("filename")); str = cdf.GetFileName(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_VECTOR_ELT(vals, ii++, mkString(cstr)); Free(cstr); /** set the names down here at the end. **/ setAttrib(vals, R_NamesSymbol, names); /** Unprotect the returned list. **/ UNPROTECT(2); /* 'names' and then 'vals' */ return vals; } /* R_affx_get_cdf_file_header() */ /************************************************************************ * * R_affx_get_cdf_file() * * Note: * Currently being updated.... * ************************************************************************/ SEXP R_affx_get_cdf_file(SEXP fname, SEXP unitIndices, SEXP verbose, SEXP returnUnitType, SEXP returnUnitDirection, SEXP returnUnitAtomNumbers, SEXP returnUnitNumber, SEXP returnXY, SEXP returnIndices, SEXP returnBases, SEXP returnAtoms, SEXP returnIndexpos, SEXP returnIsPm, SEXP returnBlockDirection, SEXP returnBlockAtomNumbers) { FusionCDFData cdf; string str; int str_length; char* cstr; char p_base, t_base; SEXP r_units_list = R_NilValue, r_units_list_names = R_NilValue, r_unit = R_NilValue, r_unit_names = R_NilValue, r_blocks_list = R_NilValue, r_blocks_list_names = R_NilValue, r_block = R_NilValue, r_block_names = R_NilValue, r_xvals = R_NilValue, r_yvals = R_NilValue, r_indices = R_NilValue, r_pbase = R_NilValue, r_tbase = R_NilValue, r_expos = R_NilValue, r_indexpos = R_NilValue, r_ispm = R_NilValue; /* ** First we read in the cdf files and get the number of units. */ int numUnits = 0; int numCols = 0; int numUnitsInFile = 0; const char* cdfFileName = CHAR(STRING_ELT(fname, 0)); int i_verboseFlag = INTEGER(verbose)[0]; int i_returnUnitType = INTEGER(returnUnitType)[0]; int i_returnUnitDirection = INTEGER(returnUnitDirection)[0]; int i_returnUnitAtomNumbers = INTEGER(returnUnitAtomNumbers)[0]; int i_returnUnitNumber = INTEGER(returnUnitAtomNumbers)[0]; int i_returnXY = INTEGER(returnXY)[0]; int i_returnIndices = INTEGER(returnIndices)[0]; int i_returnIsPm = INTEGER(returnIsPm)[0]; int i_returnBases = INTEGER(returnBases)[0]; int i_returnAtoms = INTEGER(returnAtoms)[0]; int i_returnIndexpos = INTEGER(returnIndexpos)[0]; int i_returnBlockDirection = INTEGER(returnBlockDirection)[0]; int i_returnBlockAtomNumbers = INTEGER(returnBlockAtomNumbers)[0]; bool readEveryUnit = true; cdf.SetFileName(cdfFileName); if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Attempting to read CDF File: %s\n", cdf.GetFileName().c_str()); } if (cdf.Read() == false) { error("Failed to read the CDF file.\n"); } numUnitsInFile = cdf.GetHeader().GetNumProbeSets(); numCols = cdf.GetHeader().GetCols(); if(length(unitIndices) != 0){ for (int i = 0; i < length(unitIndices); i++) { if(INTEGER(unitIndices)[i] < 1 || INTEGER(unitIndices)[i] > numUnitsInFile) error("Argument 'units' contains an element out of range [%d,%d]: %d", 1, numUnitsInFile, INTEGER(unitIndices)[i]); } numUnits = length(unitIndices); readEveryUnit = false; } else { numUnits = numUnitsInFile; readEveryUnit = true; } /* ** Now we set up stuff for the different objects we need to return */ FusionCDFProbeSetInformation unit; FusionCDFProbeGroupInformation block; FusionCDFProbeInformation probe; int unitNumBlocks; int blockNumCells; char *unitName; const char *blockName; string pbaseString, tbaseString; int ii, unit_idx, unprotectBlockInfo, numBlockArguments, numUnitArguments; PROTECT(r_units_list_names = NEW_CHARACTER(numUnits)); PROTECT(r_units_list = NEW_LIST(numUnits)); /* ** First the units themselves */ numUnitArguments = i_returnUnitType + i_returnUnitDirection + 3 * i_returnUnitAtomNumbers + i_returnUnitNumber + 1; PROTECT(r_unit_names = NEW_CHARACTER(numUnitArguments)); ii = 0; SET_STRING_ELT(r_unit_names, ii++, mkChar("groups")); if(i_returnUnitType) SET_STRING_ELT(r_unit_names, ii++, mkChar("unittype")); if(i_returnUnitDirection) SET_STRING_ELT(r_unit_names, ii++, mkChar("unitdirection")); if(i_returnUnitAtomNumbers) { SET_STRING_ELT(r_unit_names, ii++, mkChar("natoms")); SET_STRING_ELT(r_unit_names, ii++, mkChar("ncells")); SET_STRING_ELT(r_unit_names, ii++, mkChar("ncellsperatom")); } if(i_returnUnitNumber) SET_STRING_ELT(r_unit_names, ii++, mkChar("unitnumber")); /* ** Because the length of the block list can change ** from unit to unit, we have to protect these ** things below. */ /* ** Now an individual block. */ numBlockArguments = 2 * i_returnXY + i_returnIndices + 2 * i_returnBases + i_returnAtoms + 2 * i_returnBlockAtomNumbers + i_returnBlockDirection + i_returnIndexpos + i_returnIsPm; unprotectBlockInfo = 0; PROTECT(r_block_names = NEW_CHARACTER(numBlockArguments)); ii = 0; if(i_returnXY) { SET_STRING_ELT(r_block_names, ii++, mkChar("x")); unprotectBlockInfo++; SET_STRING_ELT(r_block_names, ii++, mkChar("y")); unprotectBlockInfo++; } if(i_returnIndices) { SET_STRING_ELT(r_block_names, ii++, mkChar("indices")); unprotectBlockInfo++; } if(i_returnBases) { SET_STRING_ELT(r_block_names, ii++, mkChar("pbase")); unprotectBlockInfo++; SET_STRING_ELT(r_block_names, ii++, mkChar("tbase")); unprotectBlockInfo++; } if(i_returnAtoms) { SET_STRING_ELT(r_block_names, ii++, mkChar("atom")); unprotectBlockInfo++; } if(i_returnIndexpos) { SET_STRING_ELT(r_block_names, ii++, mkChar("indexpos")); unprotectBlockInfo++; } if(i_returnIsPm) { SET_STRING_ELT(r_block_names, ii++, mkChar("ispm")); unprotectBlockInfo++; } if(i_returnBlockDirection) { SET_STRING_ELT(r_block_names, ii++, mkChar("groupdirection")); } if(i_returnBlockAtomNumbers){ SET_STRING_ELT(r_block_names, ii++, mkChar("natoms")); SET_STRING_ELT(r_block_names, ii++, mkChar("ncellsperatom")); } /* ** This needed in order to reduce memory for the Unit/Block type */ SEXP r_typeAsString; PROTECT(r_typeAsString = NEW_STRING(8)); SET_STRING_ELT(r_typeAsString, 0, mkChar("unknown")); SET_STRING_ELT(r_typeAsString, 1, mkChar("expression")); SET_STRING_ELT(r_typeAsString, 2, mkChar("genotyping")); SET_STRING_ELT(r_typeAsString, 3, mkChar("resequencing")); SET_STRING_ELT(r_typeAsString, 4, mkChar("tag")); SET_STRING_ELT(r_typeAsString, 5, mkChar("copynumber")); SET_STRING_ELT(r_typeAsString, 6, mkChar("genotypingcontrol")); SET_STRING_ELT(r_typeAsString, 7, mkChar("expressioncontrol")); SEXP r_directionAsString; PROTECT(r_directionAsString = NEW_STRING(4)); SET_STRING_ELT(r_directionAsString, 0, mkChar("nodirection")); SET_STRING_ELT(r_directionAsString, 1, mkChar("sense")); SET_STRING_ELT(r_directionAsString, 2, mkChar("antisense")); SET_STRING_ELT(r_directionAsString, 3, mkChar("unknown")); /* ** Reading in the units. */ if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("Finished setup, commencing unit parsing.\n"); } for (int iunit = 0; iunit < numUnits; iunit++) { /* Check for interrupts */ if(iunit % 1000 == 999) R_CheckUserInterrupt(); /* The index, which is 0-based in Fusion unlike our R-api */ if(readEveryUnit) unit_idx = iunit; else unit_idx = INTEGER(unitIndices)[iunit] - 1; cdf.GetProbeSetInformation(unit_idx, unit); PROTECT(r_unit = NEW_LIST(numUnitArguments)); str = cdf.GetProbeSetName(unit_idx); str_length = str.size(); unitName = Calloc(str_length+1, char); strncpy(unitName, str.c_str(), str_length); unitName[str_length] = '\0'; if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Processing unit: %s\n", unitName); } /* There is no direct GetName method for a unit - strange */ unitNumBlocks = unit.GetNumGroups(); ii = 1; /* we always return the blocks */ if(i_returnUnitType) { switch (unit.GetProbeSetType()) { case affxcdf::UnknownProbeSetType: SET_VECTOR_ELT(r_unit, ii++, ScalarString(STRING_ELT(r_typeAsString, 0))); break; case affxcdf::ExpressionProbeSetType: SET_VECTOR_ELT(r_unit, ii++, ScalarString(STRING_ELT(r_typeAsString, 1))); break; case affxcdf::GenotypingProbeSetType: SET_VECTOR_ELT(r_unit, ii++, ScalarString(STRING_ELT(r_typeAsString, 2))); break; case affxcdf::ResequencingProbeSetType: SET_VECTOR_ELT(r_unit, ii++, ScalarString(STRING_ELT(r_typeAsString, 3))); break; case affxcdf::TagProbeSetType: SET_VECTOR_ELT(r_unit, ii++, ScalarString(STRING_ELT(r_typeAsString, 4))); break; case affxcdf::CopyNumberProbeSetType: SET_VECTOR_ELT(r_unit, ii++, ScalarString(STRING_ELT(r_typeAsString, 5))); break; case affxcdf::GenotypeControlProbeSetType: SET_VECTOR_ELT(r_unit, ii++, ScalarString(STRING_ELT(r_typeAsString, 6))); break; case affxcdf::ExpressionControlProbeSetType: SET_VECTOR_ELT(r_unit, ii++, ScalarString(STRING_ELT(r_typeAsString, 7))); break; default: SET_VECTOR_ELT(r_unit, ii++, ScalarString(STRING_ELT(r_typeAsString, 0))); break; } } if(i_returnUnitDirection) { switch (unit.GetDirection()) { case affxcdf::NoDirection: SET_VECTOR_ELT(r_unit, ii++, ScalarString(STRING_ELT(r_directionAsString, 0))); break; case affxcdf::SenseDirection: SET_VECTOR_ELT(r_unit, ii++, ScalarString(STRING_ELT(r_directionAsString, 1))); break; case affxcdf::AntiSenseDirection: SET_VECTOR_ELT(r_unit, ii++, ScalarString(STRING_ELT(r_directionAsString, 2))); break; default: SET_VECTOR_ELT(r_unit, ii++, ScalarString(STRING_ELT(r_directionAsString, 3))); break; } } if(i_returnUnitAtomNumbers) { SET_VECTOR_ELT(r_unit, ii++, ScalarInteger(unit.GetNumLists())); SET_VECTOR_ELT(r_unit, ii++, ScalarInteger(unit.GetNumCells())); SET_VECTOR_ELT(r_unit, ii++, ScalarInteger(unit.GetNumCellsPerList())); } if(i_returnUnitNumber) SET_VECTOR_ELT(r_unit, ii++, ScalarInteger(unit.GetProbeSetNumber())); PROTECT(r_blocks_list = NEW_LIST(unitNumBlocks)); PROTECT(r_blocks_list_names = NEW_CHARACTER(unitNumBlocks)); /* ** Now for the blocks */ for (int iblock = 0; iblock < unitNumBlocks; iblock++) { unit.GetGroupInformation(iblock, block); PROTECT(r_block = NEW_LIST(numBlockArguments)); blockNumCells = block.GetNumCells(); blockName = block.GetName().c_str(); if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("Processing block %s\n", blockName); } if(i_returnXY) { PROTECT(r_xvals = NEW_INTEGER(blockNumCells)); PROTECT(r_yvals = NEW_INTEGER(blockNumCells)); } if(i_returnIndices) PROTECT(r_indices = NEW_INTEGER(blockNumCells)); if(i_returnBases) { PROTECT(r_pbase = NEW_CHARACTER(blockNumCells)); PROTECT(r_tbase = NEW_CHARACTER(blockNumCells)); } if(i_returnAtoms) PROTECT(r_expos = NEW_INTEGER(blockNumCells)); if(i_returnIndexpos) PROTECT(r_indexpos = NEW_INTEGER(blockNumCells)); if(i_returnIsPm) PROTECT(r_ispm = NEW_LOGICAL(blockNumCells)); for (int icell = 0; icell < blockNumCells; icell++) { block.GetCell(icell, probe); if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("icell: %d x: %d, y: %d, pbase: %c, tbase: %c, expos: %d, indexpos: %d\n", icell, probe.GetX(), probe.GetY(), probe.GetPBase(), probe.GetTBase(), probe.GetExpos(), probe.GetListIndex()); } if(i_returnXY) { INTEGER(r_xvals)[icell] = probe.GetX(); INTEGER(r_yvals)[icell] = probe.GetY(); } if(i_returnIndices) { INTEGER(r_indices)[icell] = probe.GetY() * numCols + probe.GetX() + 1; } if(i_returnBases) { pbaseString = probe.GetPBase(); SET_STRING_ELT(r_pbase, icell, mkChar(pbaseString.c_str())); tbaseString = probe.GetTBase(); SET_STRING_ELT(r_tbase, icell, mkChar(tbaseString.c_str())); } if(i_returnAtoms) INTEGER(r_expos)[icell] = probe.GetExpos(); if(i_returnIndexpos) INTEGER(r_indexpos)[icell] = probe.GetListIndex(); if(i_returnIsPm) { p_base = probe.GetPBase(); t_base = probe.GetTBase(); LOGICAL(r_ispm)[icell] = R_affx_pt_base_is_pm(p_base, t_base); } } if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("finished reading unit information for: %s\n", unitName); } /** Write everything into the r_block object, ** finalize it and unprotect **/ ii = 0; if(i_returnXY) { SET_VECTOR_ELT(r_block, ii++, r_xvals); SET_VECTOR_ELT(r_block, ii++, r_yvals); } if(i_returnIndices) SET_VECTOR_ELT(r_block, ii++, r_indices); if(i_returnBases) { SET_VECTOR_ELT(r_block, ii++, r_pbase); SET_VECTOR_ELT(r_block, ii++, r_tbase); } if(i_returnAtoms) SET_VECTOR_ELT(r_block, ii++, r_expos); if(i_returnIndexpos) SET_VECTOR_ELT(r_block, ii++, r_indexpos); if(i_returnIsPm) SET_VECTOR_ELT(r_block, ii++, r_ispm); if(i_returnBlockDirection) { switch (block.GetDirection()) { case affxcdf::NoDirection: SET_VECTOR_ELT(r_block, ii++, ScalarString(STRING_ELT(r_directionAsString, 0))); break; case affxcdf::SenseDirection: SET_VECTOR_ELT(r_block, ii++, ScalarString(STRING_ELT(r_directionAsString, 1))); break; case affxcdf::AntiSenseDirection: SET_VECTOR_ELT(r_block, ii++, ScalarString(STRING_ELT(r_directionAsString, 2))); break; default: SET_VECTOR_ELT(r_block, ii++, ScalarString(STRING_ELT(r_directionAsString, 3))); break; } } if(i_returnBlockAtomNumbers) { SET_VECTOR_ELT(r_block, ii++, ScalarInteger(block.GetNumLists())); SET_VECTOR_ELT(r_block, ii++, ScalarInteger(block.GetNumCellsPerList())); } setAttrib(r_block, R_NamesSymbol, r_block_names); UNPROTECT(unprotectBlockInfo); /** Put the block into the r_blocks_list and unprotect it **/ str = block.GetName(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_VECTOR_ELT(r_blocks_list, iblock, r_block); SET_STRING_ELT(r_blocks_list_names, iblock, mkChar(cstr)); Free(cstr); UNPROTECT(1); } /** set the r_block_list names. **/ setAttrib(r_blocks_list, R_NamesSymbol, r_blocks_list_names); /** Finalize the unit object, and unprotect ** the r_blocks_list and r_blocks_list_names **/ SET_VECTOR_ELT(r_unit, 0, r_blocks_list); setAttrib(r_unit, R_NamesSymbol, r_unit_names); UNPROTECT(2); /** now set the unit in the main units_list, ** and unprotect it. **/ SET_VECTOR_ELT(r_units_list, iunit, r_unit); SET_STRING_ELT(r_units_list_names, iunit, mkChar(unitName)); Free(unitName); UNPROTECT(1); } /* The type and direction vectors */ UNPROTECT(2); /** set the names down here at the end. **/ setAttrib(r_units_list, R_NamesSymbol, r_units_list_names); /* unprotect everything we started with, that is ** r_units_list, r_units_list_names as well as ** r_unit_names, r_block_names **/ UNPROTECT(4); return r_units_list; } /* R_affx_get_cdf_file() */ /************************************************************************ * * R_affx_get_cdf_cell_indices() * * Description: * This function returns a names R list structure where each element * corresponds to one unit. The CDF header is not returned, because * then it would be hard to differentiate that element from a unit; the * number of list elements should equal the number of units read. * ************************************************************************/ SEXP R_affx_get_cdf_cell_indices(SEXP fname, SEXP units, SEXP verbose) { FusionCDFData cdf; string str; int str_length; char* cstr; SEXP /* Returned list of units */ resUnits = R_NilValue, unitNames = R_NilValue, /* */ /** one might already want to standardize on this naming scheme... **/ r_probe_set = R_NilValue, r_probe_set_names = R_NilValue, /* */ r_group_list = R_NilValue, r_group_names = R_NilValue, /* Group fields */ indices = R_NilValue, cell_list = R_NilValue, cell_list_names = R_NilValue; bool readAll = true; int maxNbrOfUnits = 0, nbrOfUnits = 0, unitIdx = 0; int ncol = 0; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Process arguments * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ const char* cdfFileName = CHAR(STRING_ELT(fname, 0)); int i_verboseFlag = INTEGER(verbose)[0]; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Opens file * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ cdf.SetFileName(cdfFileName); if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Attempting to read CDF File: %s\n", cdf.GetFileName().c_str()); } if (cdf.Read() == false) { error("Failed to read the CDF file."); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Get unit indices to be read * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ FusionCDFFileHeader header = cdf.GetHeader(); maxNbrOfUnits = header.GetNumProbeSets(); nbrOfUnits = length(units); if (nbrOfUnits == 0) { nbrOfUnits = maxNbrOfUnits; } else { readAll = false; /* Validate argument 'units': */ for (int uu = 0; uu < nbrOfUnits; uu++) { unitIdx = INTEGER(units)[uu]; /* Unit indices are zero-based in Fusion SDK. */ if (unitIdx < 1 || unitIdx > maxNbrOfUnits) { char s[256]; sprintf(s, "Argument 'units' contains an element out of range: %d", unitIdx); error(s); } } } ncol = header.GetCols(); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Allocate 'resUnits' list * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ PROTECT(resUnits = NEW_LIST(nbrOfUnits)); PROTECT(unitNames = NEW_CHARACTER(nbrOfUnits)); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Create field names for all groups in all units * * Since all groups in all units have fields with identical names, * we can allocated the field names ones and let all groups share * the same name vector. This will save memory (appox 232 bytes * per group or 13-14Mb for a 100K SNP chip) and speed up things * about 10-20%. * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ PROTECT(cell_list_names = NEW_STRING(1)); SET_STRING_ELT(cell_list_names, 0, mkChar("indices")); /* Same for all unit elements */ PROTECT(r_probe_set_names = NEW_STRING(1)); SET_STRING_ELT(r_probe_set_names, 0, mkChar("groups")); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * For each unit * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ FusionCDFProbeSetInformation probeset; for (int uu = 0; uu < nbrOfUnits; uu++) { /* Make it possible to interrupt */ if(uu % 1000 == 999) R_CheckUserInterrupt(); if (readAll) { unitIdx = uu; } else { /* Unit indices are zero-based in Fusion SDK. */ unitIdx = INTEGER(units)[uu] - 1; } cdf.GetProbeSetInformation(unitIdx, probeset); /* get the name */ /* 'name' is a pointer to a const char: */ str = cdf.GetProbeSetName(unitIdx); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; /** ...and add to list of unit names. **/ SET_STRING_ELT(unitNames, uu, mkChar(cstr)); Free(cstr); PROTECT(r_probe_set = NEW_LIST(1)); int ngroups = probeset.GetNumGroups(); PROTECT(r_group_list = NEW_LIST(ngroups)); PROTECT(r_group_names = NEW_CHARACTER(ngroups)); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * For each group in the current unit * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ for (int igroup = 0; igroup < ngroups; igroup++) { FusionCDFProbeGroupInformation group; probeset.GetGroupInformation(igroup, group); int ncells = group.GetNumCells(); PROTECT(cell_list = NEW_LIST(1)); PROTECT(indices = NEW_INTEGER(ncells)); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * For each cell in the current group... * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ for (int icell = 0; icell < ncells; icell++) { FusionCDFProbeInformation probe; group.GetCell(icell, probe); int x = probe.GetX(); int y = probe.GetY(); /* Cell indices are one-based in R. */ INTEGER(indices)[icell] = y*ncol + x + 1; } /* for (int icell ...) */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Assign field values * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /** do I have to make the attribute vector everytime? **/ SET_VECTOR_ELT(cell_list, 0, indices); /** set the names of the new list, dont really know if I need to do this each and every time. **/ setAttrib(cell_list, R_NamesSymbol, cell_list_names); /** set these cells in the group list. **/ SET_VECTOR_ELT(r_group_list, igroup, cell_list); str = group.GetName(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(r_group_names, igroup, mkChar(cstr)); Free(cstr); /* Unprotect in reverse order */ UNPROTECT(2); /* 'indices' and then 'cell_list' */ } /* for (int igroup ...) */ /** set the group names. **/ setAttrib(r_group_list, R_NamesSymbol, r_group_names); /** add groups to current unit. **/ SET_VECTOR_ELT(r_probe_set, 0, r_group_list); /** add current unit to list of all units. **/ setAttrib(r_probe_set, R_NamesSymbol, r_probe_set_names); SET_VECTOR_ELT(resUnits, uu, r_probe_set); /* 'r_group_names' and then 'r_group_list' and 'r_probe_set' */ UNPROTECT(3); } /* for (int uu...) */ UNPROTECT(2); /* 'r_probe_set_names' and then 'cell_list_names' */ /** set all unit names. **/ setAttrib(resUnits, R_NamesSymbol, unitNames); /** unprotect return list. **/ UNPROTECT(2); /* 'unitNames' and then 'resUnits' */ return resUnits; } /* R_affx_get_cdf_cell_indices() */ /************************************************************************ * * R_affx_get_cdf_units() * * Description: * This function returns a names R list structure where each element * corresponds to one unit. The CDF header is not returned, because * then it would be hard to differentiate that element from a unit; the * number of list elements should equal the number of units read. * * Note: * This function does what R_affx_get_cdf_file() does and more. The * plan is to remove the latter. * ************************************************************************/ SEXP R_affx_get_cdf_units(SEXP fname, SEXP units, SEXP readXY, SEXP readBases, SEXP readExpos, SEXP readType, SEXP readDirection, SEXP readIndices, SEXP verbose) { FusionCDFData cdf; string str; int str_length; char* cstr; SEXP /* Returned list of units */ resUnits = R_NilValue, unitNames = R_NilValue, /* */ /** one might already want to standardize on this naming scheme... **/ r_probe_set = R_NilValue, r_probe_set_names = R_NilValue, /* */ r_group_list = R_NilValue, r_group_names = R_NilValue, /* Group fields */ xvals = R_NilValue, yvals = R_NilValue, indices = R_NilValue, pbase = R_NilValue, tbase = R_NilValue, expos = R_NilValue, cell_list = R_NilValue, cell_list_names = R_NilValue, tmp = R_NilValue; bool readAll = true; int maxNbrOfUnits = 0, nbrOfUnits = 0, unitIdx = 0; int ncol = 0; /** XXX: I am not sure this is the most elegant way to handle these in R. I initialize it hear for kicks. **/ char p_base[2] = "X"; char t_base[2] = "X"; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Process arguments * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ const char* cdfFileName = CHAR(STRING_ELT(fname, 0)); int i_readXY = INTEGER(readXY)[0]; int i_readBases = INTEGER(readBases)[0]; int i_readExpos = INTEGER(readExpos)[0]; int i_readType = INTEGER(readType)[0]; int i_readDirection = INTEGER(readDirection)[0]; int i_readIndices = INTEGER(readIndices)[0]; int i_verboseFlag = INTEGER(verbose)[0]; int i_readGroups = i_readXY || i_readBases || i_readExpos || i_readIndices; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * A special case? * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (i_readIndices && !i_readXY && !i_readBases && !i_readExpos && !i_readType && !i_readDirection) { return R_affx_get_cdf_cell_indices(fname, units, verbose); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Opens file * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ cdf.SetFileName(cdfFileName); if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Attempting to read CDF File: %s\n", cdf.GetFileName().c_str()); } if (cdf.Read() == false) { error("Failed to read the CDF file."); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Get unit indices to be read * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ FusionCDFFileHeader header = cdf.GetHeader(); maxNbrOfUnits = header.GetNumProbeSets(); nbrOfUnits = length(units); if (nbrOfUnits == 0) { nbrOfUnits = maxNbrOfUnits; } else { readAll = false; /* Validate argument 'units': */ for (int uu = 0; uu < nbrOfUnits; uu++) { unitIdx = INTEGER(units)[uu]; /* Unit indices are zero-based in Fusion SDK. */ if (unitIdx < 1 || unitIdx > maxNbrOfUnits) { char s[256]; sprintf(s, "Argument 'units' contains an element out of range: %d", unitIdx); error(s); } } } if (i_readIndices) { ncol = header.GetCols(); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Allocate 'resUnits' list * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ PROTECT(resUnits = NEW_LIST(nbrOfUnits)); PROTECT(unitNames = NEW_CHARACTER(nbrOfUnits)); int nbrOfUnitElements = i_readGroups + i_readType + i_readDirection; int nbrOfGroupElements = 2*i_readXY + 2*i_readBases + i_readExpos + i_readIndices + i_readDirection; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Create field names for all groups in all units * * Since all groups in all units have fields with identical names, * we can allocated the field names ones and let all groups share * the same name vector. This will save memory (appox 232 bytes * per group or 13-14Mb for a 100K SNP chip) and speed up things * about 10-20%. * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ int fieldIdx = 0; if (i_readGroups) { PROTECT(cell_list_names = NEW_STRING(nbrOfGroupElements)); if (i_readXY) { SET_STRING_ELT(cell_list_names, fieldIdx++, mkChar("x")); SET_STRING_ELT(cell_list_names, fieldIdx++, mkChar("y")); } if (i_readIndices) { SET_STRING_ELT(cell_list_names, fieldIdx++, mkChar("indices")); } if (i_readBases) { SET_STRING_ELT(cell_list_names, fieldIdx++, mkChar("pbase")); SET_STRING_ELT(cell_list_names, fieldIdx++, mkChar("tbase")); } if (i_readExpos) { SET_STRING_ELT(cell_list_names, fieldIdx++, mkChar("expos")); } if (i_readDirection) { SET_STRING_ELT(cell_list_names, fieldIdx++, mkChar("direction")); } } /* Same for all unit elements */ PROTECT(r_probe_set_names = NEW_STRING(nbrOfUnitElements)); int rpsi = 0; if (i_readType) { /* get the type */ SET_STRING_ELT(r_probe_set_names, rpsi++, mkChar("type")); } if (i_readDirection) { /* get the direction */ SET_STRING_ELT(r_probe_set_names, rpsi++, mkChar("direction")); } if (i_readGroups) { SET_STRING_ELT(r_probe_set_names, rpsi++, mkChar("groups")); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * For each unit * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ FusionCDFProbeSetInformation probeset; for (int uu = 0; uu < nbrOfUnits; uu++) { /* Make it possible to interrupt */ if(uu % 1000 == 999) R_CheckUserInterrupt(); if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("Unit %d/%d...", uu+1, nbrOfUnits); } else if (i_verboseFlag >= R_AFFX_VERBOSE) { if (uu % 1000 == 0 || uu == nbrOfUnits-1 || i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("%d/%d, ", uu+1, nbrOfUnits); } } if (readAll) { unitIdx = uu; } else { /* Unit indices are zero-based in Fusion SDK. */ unitIdx = INTEGER(units)[uu] - 1; } if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("Unit: %d", unitIdx); } cdf.GetProbeSetInformation(unitIdx, probeset); if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf(", "); } /* get the name */ str = cdf.GetProbeSetName(unitIdx); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; /** ...and add to list of unit names. **/ SET_STRING_ELT(unitNames, uu, mkChar(cstr)); Free(cstr); PROTECT(r_probe_set = NEW_LIST(nbrOfUnitElements)); rpsi = 0; if (i_readType) { /* get the type */ PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = probeset.GetProbeSetType(); SET_VECTOR_ELT(r_probe_set, rpsi++, tmp); UNPROTECT(1); } if (i_readDirection) { /* get the direction */ PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = probeset.GetDirection(); SET_VECTOR_ELT(r_probe_set, rpsi++, tmp); UNPROTECT(1); } if (i_readGroups) { int ngroups = probeset.GetNumGroups(); PROTECT(r_group_list = NEW_LIST(ngroups)); PROTECT(r_group_names = NEW_CHARACTER(ngroups)); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * For each group in the current unit * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ for (int igroup = 0; igroup < ngroups; igroup++) { /* if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("Group %d/%d...\n", igroup+1, ngroups); } */ FusionCDFProbeGroupInformation group; probeset.GetGroupInformation(igroup, group); int ncells = group.GetNumCells(); PROTECT(cell_list = NEW_LIST(nbrOfGroupElements)); int protectCount = 0; if (i_readXY) { PROTECT(xvals = NEW_INTEGER(ncells)); protectCount++; PROTECT(yvals = NEW_INTEGER(ncells)); protectCount++; } if (i_readIndices) { PROTECT(indices = NEW_INTEGER(ncells)); protectCount++; } if (i_readBases) { PROTECT(pbase = NEW_STRING(ncells)); protectCount++; PROTECT(tbase = NEW_STRING(ncells)); protectCount++; } if (i_readExpos) { PROTECT(expos = NEW_INTEGER(ncells)); protectCount++; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * For each cell in the current group... * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ for (int icell = 0; icell < ncells; icell++) { FusionCDFProbeInformation probe; group.GetCell(icell, probe); if (i_readXY || i_readIndices) { int x = probe.GetX(); int y = probe.GetY(); if (i_readXY) { INTEGER(xvals)[icell] = x; INTEGER(yvals)[icell] = y; } if (i_readIndices) { /* Cell indices are one-based in R. */ INTEGER(indices)[icell] = y*ncol + x + 1; } } if (i_readBases) { p_base[0] = probe.GetPBase(); t_base[0] = probe.GetTBase(); SET_STRING_ELT(pbase, icell, mkChar(p_base)); SET_STRING_ELT(tbase, icell, mkChar(t_base)); } if (i_readExpos) { INTEGER(expos)[icell] = probe.GetExpos(); } if (i_verboseFlag > R_AFFX_REALLY_VERBOSE) { Rprintf("cell: %2d, (x,y)=(%4d,%4d), (pbase,tbase)=(%c,%c), expos: %2d\n", icell, probe.GetX(), probe.GetY(), probe.GetPBase(), probe.GetTBase(), probe.GetExpos()); } } /* for (int icell ...) */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Assign field values * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ fieldIdx = 0; /** do I have to make the attribute vector everytime? **/ if (i_readXY) { SET_VECTOR_ELT(cell_list, fieldIdx++, xvals); SET_VECTOR_ELT(cell_list, fieldIdx++, yvals); } if (i_readIndices) { SET_VECTOR_ELT(cell_list, fieldIdx++, indices); } if (i_readBases) { SET_VECTOR_ELT(cell_list, fieldIdx++, pbase); SET_VECTOR_ELT(cell_list, fieldIdx++, tbase); } if (i_readExpos) { SET_VECTOR_ELT(cell_list, fieldIdx++, expos); } if (i_readDirection) { PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = group.GetDirection(); UNPROTECT(1); SET_VECTOR_ELT(cell_list, fieldIdx++, tmp); } /* Unprotect in reverse order, e.g. 'expos', ..., 'xvals' */ UNPROTECT(protectCount); /** set the names of the new list, dont really know if I need to do this each and every time. **/ setAttrib(cell_list, R_NamesSymbol, cell_list_names); /** set these cells in the group list. **/ SET_VECTOR_ELT(r_group_list, igroup, cell_list); str = group.GetName(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(r_group_names, igroup, mkChar(cstr)); Free(cstr); UNPROTECT(1); /* 'cell_list' */ /* if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("Group %d/%d...done\n", igroup+1, ngroups); } */ } /* for (int igroup ...) */ /** set the group names. **/ setAttrib(r_group_list, R_NamesSymbol, r_group_names); /** add groups to current unit. **/ SET_VECTOR_ELT(r_probe_set, rpsi, r_group_list); UNPROTECT(2); /* 'r_group_names' and then 'r_group_list' */ } /* if (i_readGroups) */ /** add current unit to list of all units. **/ setAttrib(r_probe_set, R_NamesSymbol, r_probe_set_names); SET_VECTOR_ELT(resUnits, uu, r_probe_set); UNPROTECT(1); /* 'r_probe_set' */ if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("Unit %d/%d...done\n", uu+1, nbrOfUnits); } } /* for (int uu...) */ UNPROTECT(1); /* 'r_probe_set_names' */ if (i_readGroups) { UNPROTECT(1); /* 'cell_list_names' */ } /** set all unit names. **/ setAttrib(resUnits, R_NamesSymbol, unitNames); if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("R_affx_get_cdf_units()...done\n"); } /** unprotect return list. **/ UNPROTECT(2); /* 'unitNames' and then 'resUnits' */ return resUnits; } /* R_affx_get_cdf_units() */ /************************************************************************ * * R_affx_get_cdf_unit_names() * ************************************************************************/ SEXP R_affx_get_cdf_unit_names(SEXP fname, SEXP units, SEXP verbose) { FusionCDFData cdf; string str; int str_length; char* cstr; SEXP names = R_NilValue; bool readAll = true; int maxNbrOfUnits = 0, nbrOfUnits = 0; int unitIdx = 0; const char* cdfFileName = CHAR(STRING_ELT(fname, 0)); int i_verboseFlag = INTEGER(verbose)[0]; cdf.SetFileName(cdfFileName); if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Attempting to read CDF File: %s\n", cdf.GetFileName().c_str()); } if (cdf.Read() == false) { error("Failed to read the CDF file."); } FusionCDFFileHeader header = cdf.GetHeader(); maxNbrOfUnits = header.GetNumProbeSets(); nbrOfUnits = length(units); if (nbrOfUnits == 0) { nbrOfUnits = maxNbrOfUnits; } else { readAll = false; /* Validate argument 'units': */ for (int uu = 0; uu < nbrOfUnits; uu++) { unitIdx = INTEGER(units)[uu]; /* Unit indices are zero-based in Fusion SDK. */ if (unitIdx < 1 || unitIdx > maxNbrOfUnits) { char s[256]; sprintf(s, "Argument 'units' contains an element out of range: %d", unitIdx); error(s); } } } /** the probe set names. **/ PROTECT(names = NEW_CHARACTER(nbrOfUnits)); if (readAll) { for (int uu = 0; uu < nbrOfUnits; uu++) { str = cdf.GetProbeSetName(uu); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(names, uu, mkChar(cstr)); Free(cstr); } } else { for (int uu = 0; uu < nbrOfUnits; uu++) { /* Unit indices are zero-based in Fusion SDK. */ unitIdx = INTEGER(units)[uu] - 1; str = cdf.GetProbeSetName(unitIdx); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(names, uu, mkChar(cstr)); Free(cstr); } } /** unprotect the return vector. **/ UNPROTECT(1); /* 'names' */ return names; } /* R_affx_get_cdf_unit_names() */ } /** end extern C **/ /*************************************************************************** * HISTORY: * 2014-10-28 * o BUG FIX: Argument 'unitIndices' to R_affx_get_cdf_file_qc() and R_affx_get_cdf_file() could contain elements out of range [1,J]. If zero or a negative unit was queried a core dump could occur. If a too large unit was queried, random garbage was read. The intended argument check was never performed due to a typo swapping 'condition' and 'increase' in a for (initialization; condition; increase) loop. * 2008-08-09 * o Now unit type 5 is reported as 'copynumber' and no longer as 'unknown'. * 2007-01-05 * o BUG FIX: The new group 'direction' field was added first making all * fields getting names of other fields. * 2006-12-30 * o Added group directions to R_affx_get_cdf_units() too. That is the * most important group element missing. /HB * 2006-08-28 * o If a unit index is out of range, the error now show the invalid index. * 2006-04-02 * o Added R_affx_get_cdf_cell_indices() for faster reading of cell indices. * 2006-04-01 * o For R_affx_get_cdf_units() the field names in all groups and units are * pointing to the same vector of names in memory. This save about * 13-15Mb and speeds up the reading. I think/hope this is valid to do! * 2006-03-28 * o Unit indices and cell indices are now one-based. /HB * o Renamed argument 'readCells' to 'readIndices' and returned field * 'cells' to 'indices'. * 2006-03-26 * o Note: PROTECT()/UNPROTECT() is a LIFO stack and not a FIFO queue! * o Added more verbose output. Trying to find where R crashes. There * seems to be some kind of memory leak, but I cannot pinpoint it. /HB * o Added PROTECT() to all allocVector() allocated variables in * R_affx_get_cdf_units(). * 2006-02-21 * o Added argument 'readCells' to R_affx_get_cdf_units(). /HB * 2006-01-15 * o It is now possible to specify what readCdfUnits() should return. /HB * 2006-01-12 * o BUG FIX: The check of the upper-limit of unit indicies was done * assuming one-based indices. /HB * 2006-01-10 * o Updated the "units" code to be more similar to the corresponding code * for CEL files. /HB * o Added a return value to non-void function R_affx_get_cdf_file_qc(). /HB * 2006-01-09 * o Added R_affx_get_cdf_units() and R_affx_get_cdf_unit.names(). /HB * o Created. The purpose was to make it possible to read subsets of units * and not just all units at once. /HB **************************************************************************/ affxparser/src/R_affx_cel_parser.cpp0000644000175200017520000004576514516003651020656 0ustar00biocbuildbiocbuild#include "FusionCELData.h" #include #include "R_affx_constants.h" using namespace std; using namespace affymetrix_fusion_io; #include #include #include #include extern "C" { /************************************************************************ * * R_affx_extract_cel_file_meta() * * return a list of the meta data associated with this cell * file. This is the information stored in the header. * ************************************************************************/ SEXP R_affx_extract_cel_file_meta(FusionCELData &cel) { SEXP names, vals; SEXP tmp; int kk = 0; string str; int str_length; char* cstr; PROTECT(names = NEW_CHARACTER(14)); PROTECT(vals = NEW_LIST(14)); SET_STRING_ELT(names, kk, mkChar("filename")); str = cel.GetFileName(); str_length = str.size(); cstr = Calloc(str_length+1, char); strncpy(cstr, str.c_str(), str_length); cstr[str_length] = '\0'; SET_VECTOR_ELT(vals, kk++, mkString(cstr)); Free(cstr); SET_STRING_ELT(names, kk, mkChar("version")); PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = cel.GetVersion(); SET_VECTOR_ELT(vals, kk++, tmp); UNPROTECT(1); SET_STRING_ELT(names, kk, mkChar("cols")); PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = cel.GetCols(); SET_VECTOR_ELT(vals, kk++, tmp); UNPROTECT(1); SET_STRING_ELT(names, kk, mkChar("rows")); PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = cel.GetRows(); SET_VECTOR_ELT(vals, kk++, tmp); UNPROTECT(1); SET_STRING_ELT(names, kk, mkChar("total")); PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = cel.GetNumCells(); SET_VECTOR_ELT(vals, kk++, tmp); UNPROTECT(1); #ifdef SUPPORT_MBCS str_length = cel.GetAlg().size(); cstr = Calloc(str_length+1, char); wcstombs(cstr, cel.GetAlg().c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(names, kk, mkChar("algorithm")); SET_VECTOR_ELT(vals, kk++, mkString(cstr)); Free(cstr); str_length = cel.GetParams().size(); cstr = Calloc(str_length+1, char); wcstombs(cstr, cel.GetParams().c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(names, kk, mkChar("parameters")); SET_VECTOR_ELT(vals, kk++, mkString(cstr)); Free(cstr); str_length = cel.GetChipType().size(); cstr = Calloc(str_length+1, char); wcstombs(cstr, cel.GetChipType().c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(names, kk, mkChar("chiptype")); SET_VECTOR_ELT(vals, kk++, mkString(cstr)); Free(cstr); str_length = cel.GetHeader().size(); cstr = Calloc(str_length+1, char); wcstombs(cstr, cel.GetHeader().c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(names, kk, mkChar("header")); SET_VECTOR_ELT(vals, kk++, mkString(cstr)); Free(cstr); str_length = cel.GetDatHeader().size(); cstr = Calloc(str_length+1, char); wcstombs(cstr, cel.GetDatHeader().c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(names, kk, mkChar("datheader")); SET_VECTOR_ELT(vals, kk++, mkString(cstr)); Free(cstr); str_length = cel.GetLibraryPackageName().size(); cstr = Calloc(str_length+1, char); wcstombs(cstr, cel.GetLibraryPackageName().c_str(), str_length); cstr[str_length] = '\0'; SET_STRING_ELT(names, kk, mkChar("librarypackage")); SET_VECTOR_ELT(vals, kk++, mkString(cstr)); Free(cstr); #else SET_STRING_ELT(names, kk, mkChar("algorithm")); SET_VECTOR_ELT(vals, kk++, R_NilValue); SET_STRING_ELT(names, kk, mkChar("parameters")); SET_VECTOR_ELT(vals, kk++, R_NilValue); SET_STRING_ELT(names, kk, mkChar("chiptype")); SET_VECTOR_ELT(vals, kk++, R_NilValue); SET_STRING_ELT(names, kk, mkChar("header")); SET_VECTOR_ELT(vals, kk++, R_NilValue); SET_STRING_ELT(names, kk, mkChar("datheader")); SET_VECTOR_ELT(vals, kk++, R_NilValue); SET_STRING_ELT(names, kk, mkChar("librarypackage")); SET_VECTOR_ELT(vals, kk++, R_NilValue); #endif SET_STRING_ELT(names, kk, mkChar("cellmargin")); PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = cel.GetCellMargin(); SET_VECTOR_ELT(vals, kk++, tmp); UNPROTECT(1); SET_STRING_ELT(names, kk, mkChar("noutliers")); PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = cel.GetNumOutliers(); SET_VECTOR_ELT(vals, kk++, tmp); UNPROTECT(1); SET_STRING_ELT(names, kk, mkChar("nmasked")); PROTECT(tmp = allocVector(INTSXP, 1)); INTEGER(tmp)[0] = cel.GetNumMasked(); SET_VECTOR_ELT(vals, kk++, tmp); UNPROTECT(1); setAttrib(vals, R_NamesSymbol, names); UNPROTECT(2); return vals; } /************************************************************************ * * R_affx_get_cel_file_header() * * This quickly reads only the cel file header. * ************************************************************************/ SEXP R_affx_get_cel_file_header(SEXP fname) { FusionCELData cel; SEXP header = R_NilValue; const char* celFileName = CHAR(STRING_ELT(fname,0)); cel.SetFileName(celFileName); // if (cel.ReadHeader() == false) { if (cel.Exists() == false) { error("Cannot read CEL file header. File not found: %s\n", celFileName); } cel.Read(); try { PROTECT(header = R_affx_extract_cel_file_meta(cel)); UNPROTECT(1); } catch(affymetrix_calvin_exceptions::CalvinException& ex) { error("[affxparser Fusion SDK exception] Failed to parse header of CEL file: %s\n", celFileName); } return header; } /************************************************************************ * * R_affx_get_cel_file() * * read cel file either partially or completely. * ************************************************************************/ SEXP R_affx_get_cel_file(SEXP fname, SEXP readHeader, SEXP readIntensities, SEXP readX, SEXP readY, SEXP readPixels, SEXP readStdvs, SEXP readOutliers, SEXP readMasked, SEXP indices, SEXP verbose) { FusionCELData cel; SEXP header = R_NilValue, xvals = R_NilValue, yvals = R_NilValue, intensities = R_NilValue, stdvs = R_NilValue, pixels = R_NilValue, outliers = R_NilValue, masked = R_NilValue; unsigned int nbrOfOutliers = 0, nbrOfMasked = 0; unsigned int outliersCount = 0, maskedCount = 0; int protectCount = 0; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Process arguments * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ const char* celFileName = CHAR(STRING_ELT(fname,0)); int i_readHeader = INTEGER(readHeader)[0]; int i_readX = INTEGER(readX)[0]; int i_readY = INTEGER(readY)[0]; int i_readIntensities = INTEGER(readIntensities)[0]; int i_readStdvs = INTEGER(readStdvs)[0]; int i_readPixels = INTEGER(readPixels)[0]; int i_readOutliers = INTEGER(readOutliers)[0]; int i_readMasked = INTEGER(readMasked)[0]; int i_verboseFlag = INTEGER(verbose)[0]; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Opens file * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("attempting to read: %s\n", celFileName); } /** XXX: there are three Read methods - I do not know which one is the most appropriate here, but this default method seems to read everything. Ex(celFileName, FusionCELData::CEL_ALL) **/ cel.SetFileName(celFileName); if (cel.Exists() == false) { error("Cannot read CEL file. File not found: %s\n", celFileName); } if (cel.Read(true) == false) { error("Cannot read CEL file: %s\n", celFileName); } if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("sucessfully read: %s\n", celFileName); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Get cell indices to be read * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ bool readAll = true; int maxNbrOfCells; int nbrOfCells = length(indices); try { maxNbrOfCells = cel.GetNumCells(); } catch(affymetrix_calvin_exceptions::CalvinException& ex) { UNPROTECT(protectCount); error("[affxparser Fusion SDK exception] Failed to parse CEL file: %s\n", celFileName); } if (nbrOfCells == 0) { nbrOfCells = maxNbrOfCells; } else { readAll = false; /* Validate argument 'indices': */ for (int ii = 0; ii < nbrOfCells; ii++) { int index = INTEGER(indices)[ii]; /* Cell indices are zero-based in Fusion SDK. */ if (index < 1 || index > maxNbrOfCells) { error("Argument 'indices' contains an element out of range."); } } nbrOfCells = length(indices); } if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Reading %d cells.\n", nbrOfCells); } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Read header (optional) * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (i_readHeader != 0) { try { PROTECT(header = R_affx_extract_cel_file_meta(cel)); protectCount++; } catch(affymetrix_calvin_exceptions::CalvinException& ex) { error("[affxparser Fusion SDK exception] Failed to parse header of CEL file: %s\n", celFileName); } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * Allocate memory for each vector to be returned. * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Read X and Y (optional) */ if (i_readX != 0) { PROTECT(xvals = NEW_INTEGER(nbrOfCells)); protectCount++; } if (i_readY != 0) { PROTECT(yvals = NEW_INTEGER(nbrOfCells)); protectCount++; } /* Read intensities (optional) */ if (i_readIntensities != 0) { PROTECT(intensities = NEW_NUMERIC(nbrOfCells)); protectCount++; } /* Read standard deviations (optional) */ if (i_readStdvs != 0) { PROTECT(stdvs = NEW_NUMERIC(nbrOfCells)); protectCount++; } /* Read number of pixels (optional) */ if (i_readPixels != 0) { PROTECT(pixels = NEW_INTEGER(nbrOfCells)); protectCount++; } /* Read outlier features (optional) */ if (i_readOutliers != 0) { try { nbrOfOutliers = cel.GetNumOutliers(); } catch(affymetrix_calvin_exceptions::CalvinException& ex) { UNPROTECT(protectCount); error("[affxparser Fusion SDK exception] Failed to parse CEL file: %s\n", celFileName); } if (i_verboseFlag >= R_AFFX_VERBOSE) Rprintf("Number of outliers to be read: %d\n", nbrOfOutliers); PROTECT(outliers = NEW_INTEGER(nbrOfOutliers)); protectCount++; } /* Read masked features (optional) */ if (i_readMasked != 0) { try { nbrOfMasked = cel.GetNumMasked(); } catch(affymetrix_calvin_exceptions::CalvinException& ex) { UNPROTECT(protectCount); error("[affxparser Fusion SDK exception] Failed to parse CEL file: %s\n", celFileName); } if (i_verboseFlag >= R_AFFX_VERBOSE) Rprintf("Number of masked to be read: %d\n", nbrOfMasked); PROTECT(masked = NEW_INTEGER(nbrOfMasked)); protectCount++; } /** here we will store the above entries in that order. **/ SEXP result_list; SEXP names; /* Number of elements in return list */ int nbrOfElements = (i_readHeader + i_readX + i_readY + i_readIntensities + i_readStdvs + i_readPixels + i_readOutliers + i_readMasked); PROTECT(result_list = NEW_LIST(nbrOfElements)); protectCount++; PROTECT(names = NEW_CHARACTER(nbrOfElements)); protectCount++; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * For each cell * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ for (int icel = 0, index = 0; icel < nbrOfCells; icel++) { if (i_verboseFlag >= R_AFFX_VERBOSE) { if (icel % 1000 == 0 || icel == nbrOfCells-1) { Rprintf("%d/%d, ", icel+1, nbrOfCells); } } if (readAll) { index = icel; } else { /* Cell indices are zero-based in Fusion SDK */ index = INTEGER(indices)[icel] - 1; } try { if (i_verboseFlag >= R_AFFX_REALLY_VERBOSE) { Rprintf("index: %d, x: %d, y: %d, intensity: %f, stdv: %f, pixels: %d\n", index, cel.IndexToX(index), cel.IndexToY(index), cel.GetIntensity(index), cel.GetStdv(index), cel.GetPixels(index)); } /* Read X and Y (optional) */ if (i_readX != 0) { INTEGER(xvals)[icel] = cel.IndexToX(index); } if (i_readY != 0) { INTEGER(yvals)[icel] = cel.IndexToY(index); } if (i_readIntensities != 0) { REAL(intensities)[icel] = cel.GetIntensity(index); } /* Read standard deviations (optional) */ if (i_readStdvs != 0) { REAL(stdvs)[icel] = cel.GetStdv(index); } /* Read number of pixels (optional) */ if (i_readPixels != 0) { INTEGER(pixels)[icel] = cel.GetPixels(index); } } catch(affymetrix_calvin_exceptions::CalvinException& ex) { UNPROTECT(protectCount); error("[affxparser Fusion SDK exception] Failed to parse CEL file: %s\n", celFileName); } /* Read outlier features (optional) */ if (i_readOutliers != 0) { bool isOutlier; try { isOutlier = cel.IsOutlier(index); } catch(affymetrix_calvin_exceptions::CalvinException& ex) { UNPROTECT(protectCount); error("[affxparser Fusion SDK exception] Failed to parse CEL file: %s\n", celFileName); } if (isOutlier) { if (outliersCount >= nbrOfOutliers) error("Internal error: Too many cells flagged as outliers."); /* Cell indices are one-based in R */ INTEGER(outliers)[outliersCount++] = index + 1; } } /* Read masked features (optional) */ if (i_readMasked != 0) { bool isMasked; try { isMasked = cel.IsMasked(index); } catch(affymetrix_calvin_exceptions::CalvinException& ex) { UNPROTECT(protectCount); error("[affxparser Fusion SDK exception] Failed to parse CEL file: %s\n", celFileName); } if (isMasked) { if (maskedCount >= nbrOfMasked) error("Internal error: Too many cells flagged as masked."); /* Cell indices are one-based in R */ INTEGER(masked)[maskedCount++] = index + 1; } } } /* for (int icel ...) */ /** resize here if we only read part of the cel then we only want the outliers which correspond to that part. **/ if (i_readOutliers != 0) { if (outliersCount == 0) { outliers = R_NilValue; } else if (outliersCount < nbrOfOutliers) { PROTECT(SET_LENGTH(outliers, outliersCount)); protectCount++; } } if (i_readMasked != 0) { if (maskedCount == 0) { masked = R_NilValue; } else if (maskedCount < nbrOfMasked) { PROTECT(SET_LENGTH(masked, maskedCount)); protectCount++; } } /** set up the names of the result list. **/ int jj = 0; if (i_readHeader != 0) { SET_STRING_ELT(names, jj, mkChar("header")); SET_VECTOR_ELT(result_list, jj++, header); } if (i_readX != 0) { SET_STRING_ELT(names, jj, mkChar("x")); SET_VECTOR_ELT(result_list, jj++, xvals); } if (i_readY != 0) { SET_STRING_ELT(names, jj, mkChar("y")); SET_VECTOR_ELT(result_list, jj++, yvals); } if (i_readIntensities != 0) { SET_STRING_ELT(names, jj, mkChar("intensities")); SET_VECTOR_ELT(result_list, jj++, intensities); } if (i_readStdvs != 0) { SET_STRING_ELT(names, jj, mkChar("stdvs")); SET_VECTOR_ELT(result_list, jj++, stdvs); } if (i_readPixels != 0) { SET_STRING_ELT(names, jj, mkChar("pixels")); SET_VECTOR_ELT(result_list, jj++, pixels); } if (i_readOutliers != 0) { SET_STRING_ELT(names, jj, mkChar("outliers")); SET_VECTOR_ELT(result_list, jj++, outliers); } if (i_readMasked != 0) { SET_STRING_ELT(names, jj, mkChar("masked")); SET_VECTOR_ELT(result_list, jj++, masked); } /** set the names of the list entries. **/ setAttrib(result_list, R_NamesSymbol, names); if (i_verboseFlag >= R_AFFX_VERBOSE) { Rprintf("Finished reading CEL file.\n"); } /* Note: If possible, that is if we do not fill the PROTECT stack (limited to 10,000), it is much safer to UNPROTECT() everything at the end. Remember, it is a *stack* where UNPROTECT() is for the latest PROTECT():ed object; it is *not* a FIFO queue */ UNPROTECT(protectCount); return result_list; } /* R_affx_get_cel_file() */ } /** end extern "C" **/ /*************************************************************************** * HISTORY: * 2015-05-05 * o ROBUSTNESS: Now using try-catch to pass exceptions to R. * 2006-09-15 * o BUG FIX: Forgot to allocate space for NULL terminator in 'd' in call * wcstombs(d,s,n), alternatively making sure d[last] == '\0'. I added * both explicitly just in case. * 2006-03-28 * o Cell indices are now one-based. /HB * o Added validation to argument 'indices'. /HB * o Renamed argument 'indicesToRead' to 'indices' (as in R). /HB * 2006-03-26 * o Note: PROTECT()/UNPROTECT() is a LIFO stack and not a FIFO queue! * o Replaced 'x=lengthgets(x,n)' with macro 'SET_LENGTH(x, n)'. * o Added check to make sure assignment to vectors of outliers and masked * is not out of bound. * o Added PROTECT() to all allocVector() allocated variables in * R_affx_extract_cel_file_meta(). * o Removed integer variable 'numIndices'. It was only used once. * o Added PROTECT() around 'outliers=R_NilValue' and 'masked=R_NilValue' * just in case. * 2006-02-19 * o In R_affx_get_cel_file(): 'noutliers' and 'nmasked' are declared * 'unsigned int' (not just 'int'). * o Re-incorporated code in the affxparser devel code. * 2006-01-16 * o BUG FIX: Changed the order of elements returned to reflect the order * in the CEL file. * o BUG FIX: R_affx_get_cel_file() had a few memory bugs. Especially, the * returned list was never protected. * o Renamed a few variable to be more readable and searchable, e.g. jj. * 2006-01-13 * o BUG FIX: The FusionCELData argument was passed as copy by value instead * of copy by reference to R_affx_extract_cel_file_meta(), which made the * object to be destructed twice giving strange memory problem. /HB **************************************************************************/ affxparser/src/R_affx_chp_parser.cpp0000644000175200017520000006710614516003651020656 0ustar00biocbuildbiocbuild#include "FusionCHPData.h" #include "FusionCHPLegacyData.h" #include "FusionCHPMultiDataData.h" #include "FusionCHPQuantificationData.h" #include "FusionCHPQuantificationDetectionData.h" #include "FusionCHPDataAdapterInterface.h" #include "FusionCHPTilingData.h" #include "CHPReseqEntry.h" #include "StringUtils.h" #include "ParameterNameValueType.h" #include using namespace std; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_data; using namespace affymetrix_calvin_io; using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_parameter; #include #define SET_NAMED_ELT(lst, index, val, nameLst, nameVal) \ SET_ELEMENT(lst, index, val); \ SET_STRING_ELT(nameLst, index, mkChar(nameVal)) // const char * // wcs_to_cstr(std::wstring wstr) // { // #ifdef SUPPORT_MBCS // return StringUtils::ConvertWCSToMBS(wstr).c_str(); // #else // return "\0"; // #endif // } char * wcs_to_cstr(std::wstring wstr) { #ifdef SUPPORT_MBCS string str; int str_length; char* cstr; str_length = wstr.size(); cstr = Calloc(str_length+1, char); wcstombs(cstr, wstr.c_str(), str_length); cstr[str_length] = '\0'; return cstr; #else return "\0"; #endif } int R_affx_AddCHPMeta(AffymetrixGuidType fileId, wstring algName, wstring algVersion, wstring arrayType, SEXP lst, SEXP nms, int lstIdx) { char* cstr; SET_NAMED_ELT(lst, lstIdx, mkString(fileId.c_str()), nms, "FileId"); SET_NAMED_ELT(lst, lstIdx+1, mkString(cstr = wcs_to_cstr(algName)), nms, "AlgorithmName"); Free(cstr); SET_NAMED_ELT(lst, lstIdx+2, mkString(cstr = wcs_to_cstr(algVersion)), nms, "AlgorithmVersion"); Free(cstr); SET_NAMED_ELT(lst, lstIdx+3, mkString(cstr = wcs_to_cstr(arrayType)), nms, "ArrayType"); Free(cstr); return lstIdx+4; } int R_affx_AddCHPTileMeta(AffymetrixGuidType fileId, wstring algName, wstring algVersion, SEXP lst, SEXP nms, int lstIdx) { char* cstr; SET_NAMED_ELT(lst, lstIdx, mkString(fileId.c_str()), nms, "FileId"); SET_NAMED_ELT(lst, lstIdx+1, mkString(cstr = wcs_to_cstr(algName)), nms, "AlgorithmName"); Free(cstr); SET_NAMED_ELT(lst, lstIdx+2, mkString(cstr = wcs_to_cstr(algVersion)), nms, "AlgorithmVersion"); Free(cstr); return lstIdx+3; } SEXP R_affx_GetList(FusionTagValuePairTypeList& params) { SEXP pLst, pNms, pVal; int pIdx = 0, pNbr = params.size(); char* cstr; PROTECT(pLst = NEW_LIST(pNbr)); PROTECT(pNms = NEW_CHARACTER(pNbr)); for(FusionTagValuePairTypeList::iterator param=params.begin(); param != params.end(); ++pIdx, ++param) { PROTECT(pVal = mkString(cstr = wcs_to_cstr(param->Value))); Free(cstr); SET_NAMED_ELT(pLst, pIdx, pVal, pNms, cstr = wcs_to_cstr(param->Tag)); Free(cstr); UNPROTECT(1); } SET_NAMES(pLst, pNms); UNPROTECT(2); return pLst; } SEXP R_affx_GetList(ParameterNameValueTypeList params) // R_affx_ParamaterNameValueTypeList(ParameterNameValueTypeList params) { SEXP pLst, pNms, pVal, pName; int pIdx=0, pNbr = params.size(); char* cstr; PROTECT(pLst = NEW_LIST(pNbr)); PROTECT(pNms = NEW_CHARACTER(pNbr)); /* no idea why one must use mkString to hold pName and not a char* but the former works and the latter does not */ for(ParameterNameValueTypeList::iterator param=params.begin(); param != params.end(); ++pIdx, ++param) { PROTECT(pName = mkString(cstr = wcs_to_cstr(param->GetName()))); Free(cstr); switch(param->GetParameterType()) { case ParameterNameValueType::Int8Type: PROTECT(pVal = ScalarInteger(param->GetValueInt8())); break; case ParameterNameValueType::Int16Type: PROTECT(pVal = ScalarInteger(param->GetValueInt16())); break; case ParameterNameValueType::Int32Type: PROTECT(pVal = ScalarInteger(param->GetValueInt32())); break; case ParameterNameValueType::UInt8Type: case ParameterNameValueType::UInt16Type: case ParameterNameValueType::UInt32Type: warning("reading 'unsigned int' parameter '%s' as NA", pName); PROTECT(pVal = ScalarInteger(R_NaInt)); break; case ParameterNameValueType::FloatType: PROTECT(pVal = ScalarReal(param->GetValueFloat())); break; case ParameterNameValueType::TextType: PROTECT(pVal = mkString(cstr = wcs_to_cstr(param->GetValueText()))); Free(cstr); break; case ParameterNameValueType::AsciiType: PROTECT(pVal = mkString(param->GetValueAscii().c_str())); break; default: warning("unhandled type for parameter '%s'", CHAR(pName)); PROTECT(pVal = ScalarString(R_NaString)); } SET_NAMED_ELT(pLst, pIdx, pVal, pNms, CHAR(STRING_ELT(pName,0))); UNPROTECT(2); } SET_NAMES(pLst, pNms); UNPROTECT(2); return pLst; } int R_affx_AddList(FusionTagValuePairTypeList& list, SEXP lst, SEXP nms, int lstIdx, const char *eltName) { SEXP paramList; PROTECT(paramList = R_affx_GetList(list)); SET_NAMED_ELT(lst, lstIdx, paramList, nms, eltName); UNPROTECT(1); return lstIdx+1; } int R_affx_AddList(ParameterNameValueTypeList list, SEXP lst, SEXP nms, int lstIdx, const char *eltName) { SEXP paramList; PROTECT(paramList = R_affx_GetList(list)); SET_NAMED_ELT(lst, lstIdx, paramList, nms, eltName); UNPROTECT(1); return lstIdx+1; } SEXP R_affx_GetCHPReseqResults(FusionCHPLegacyData *chp) { SEXP call, score, rval, nms, fcall, freason, fposition, fnms, force, orig, onames, ocall; int ct, i, nprotect=0; char *s, *sc, *sr; FusionResequencingResults frResults; chp->GetReseqResults(frResults); ct = frResults.GetCalledBasesSize(); s = (char *) R_alloc((long) (ct) + 1L, sizeof(char)); for(i=0; i 0 ) { PROTECT(force = NEW_LIST(3)); PROTECT(fposition = NEW_INTEGER(ct)); nprotect+=2; sc = (char *) R_alloc((long) (ct) + 1L, sizeof(char)); sr = (char *) R_alloc((long) (ct) + 1L, sizeof(char)); FusionForceCallType ffct; for(i=0; i 0 ) { PROTECT(fposition = NEW_INTEGER(ct)); nprotect++; sc = (char *) R_alloc((long) (ct) + 1L, sizeof(char)); FusionBaseCallType fbct; for(i=0; iGetHeader().GetNumProbeSets(); PROTECT(ans = NEW_NUMERIC(count)); for(i = 0; i < count; ++i) { chp->GetUniversalResults(i, rFU); REAL(ans)[i] = rFU.GetBackground(); } UNPROTECT(1); return ans; } SEXP R_affx_GetCHPGenotypingResults(FusionCHPLegacyData *chp) { SEXP rval, ras1, ras2, aa, ab, bb, nocall, call, conf, callstr, alg; int qNbr = chp->GetHeader().GetNumProbeSets(), i, nprotect=0, nelt; bool bWholeGenome = false, bDynamicModel = false; char* cstr; PROTECT(call = NEW_INTEGER(qNbr)); PROTECT(conf = NEW_NUMERIC(qNbr)); PROTECT(callstr = NEW_CHARACTER(qNbr)); nprotect = 3; //FIXME: I did not think AlgName could be "", it is, so we are stuck //with that PROTECT(alg = mkString(cstr = wcs_to_cstr(chp->GetHeader().GetAlgName()))); Free(cstr); nprotect++; if(chp->GetHeader().GetAlgName() == L"WholeGenome") { bWholeGenome = true; PROTECT(ras1 = NEW_NUMERIC(qNbr)); PROTECT(ras2 = NEW_NUMERIC(qNbr)); nprotect+=2; Rprintf("dudey"); } else if(chp->GetHeader().GetAlgName() == L"DynamicModel") { bDynamicModel = true; PROTECT(aa = NEW_NUMERIC(qNbr)); PROTECT(ab = NEW_NUMERIC(qNbr)); PROTECT(bb = NEW_NUMERIC(qNbr)); PROTECT(nocall = NEW_NUMERIC(qNbr)); nprotect+=4; Rprintf("howdy"); } FusionGenotypeProbeSetResults f; for(i=0; iGetGenotypingResults(i, f); INTEGER(call)[i] = f.GetAlleleCall(); SET_STRING_ELT(callstr, i, mkChar(f.GetAlleleCallString().c_str())); REAL(conf)[i] = f.GetConfidence(); if( bWholeGenome ) { REAL(ras1)[i] = f.GetRAS1(); REAL(ras2)[i] = f.GetRAS2(); } if( bDynamicModel ) { REAL(aa)[i] = f.GetPValueAA(); REAL(ab)[i] = f.GetPValueAB(); REAL(bb)[i] = f.GetPValueBB(); REAL(nocall)[i] = f.GetPValueNoCall(); } } if( bWholeGenome ) nelt = 6; else if(bDynamicModel) nelt = 8; else nelt = 4; PROTECT(rval = NEW_LIST(nelt)); nprotect++; SET_ELEMENT(rval, 0, call); SET_ELEMENT(rval, 1, conf); SET_ELEMENT(rval, 2, callstr); if( bWholeGenome ) { SET_ELEMENT(rval, 3, mkString("WholeGenome")); SET_ELEMENT(rval, 4, ras1); SET_ELEMENT(rval, 5, ras1); } else if( bDynamicModel ) { SET_ELEMENT(rval, 3, mkString("DynamicModel")); SET_ELEMENT(rval, 4, aa); SET_ELEMENT(rval, 5, ab); SET_ELEMENT(rval, 6, bb); SET_ELEMENT(rval, 7, nocall); } else { SET_ELEMENT(rval, 3, mkString("None")); } SEXP nms; PROTECT(nms = NEW_CHARACTER(nelt)); nprotect++; SET_STRING_ELT(nms, 0, mkChar("Call")); SET_STRING_ELT(nms, 1, mkChar("Confidence")); SET_STRING_ELT(nms, 2, mkChar("AlleleString")); SET_STRING_ELT(nms, 3, mkChar("AlgName")); if( bWholeGenome ) { SET_STRING_ELT(nms, 4, mkChar("RAS1")); SET_STRING_ELT(nms, 5, mkChar("RAS2")); } else if (bDynamicModel) { SET_STRING_ELT(nms, 4, mkChar("PvalueAA")); SET_STRING_ELT(nms, 5, mkChar("PvalueAB")); SET_STRING_ELT(nms, 6, mkChar("PvalueBB")); SET_STRING_ELT(nms, 7, mkChar("PvalueNoCall")); } SET_NAMES(rval, nms); UNPROTECT(nprotect); return rval; } SEXP R_affx_GetCHPExpressionResults(FusionCHPLegacyData *chp) { SEXP detectionPValue, signal, numPairs, numUsedPairs, detection, hasCompResults, changePValue, signalLogRatio, signalLogRatioLow, signalLogRatioHigh, numCommonPairs, change; int qNbr=chp->GetHeader().GetNumProbeSets(); PROTECT(detectionPValue = NEW_NUMERIC(qNbr)); PROTECT(signal = NEW_NUMERIC(qNbr)); PROTECT(numPairs = NEW_INTEGER(qNbr)); PROTECT(numUsedPairs = NEW_INTEGER(qNbr)); PROTECT(detection = NEW_INTEGER(qNbr)); PROTECT(hasCompResults = NEW_LOGICAL(qNbr)); PROTECT(changePValue = NEW_NUMERIC(qNbr)); PROTECT(signalLogRatio = NEW_NUMERIC(qNbr)); PROTECT(signalLogRatioLow = NEW_NUMERIC(qNbr)); PROTECT(signalLogRatioHigh = NEW_NUMERIC(qNbr)); PROTECT(numCommonPairs = NEW_INTEGER(qNbr)); PROTECT(change = NEW_INTEGER(qNbr)); // FIXME: probe set names need to come from PSI file // FIXME: very inefficient -- function calls for each assignment FusionExpressionProbeSetResults psResults; for (int qIdx=0; qIdxGetExpressionResults(qIdx, psResults); REAL(detectionPValue)[qIdx] = psResults.GetDetectionPValue(); REAL(signal)[qIdx] = psResults.GetSignal(); INTEGER(numPairs)[qIdx] = psResults.GetNumPairs(); INTEGER(numUsedPairs)[qIdx] = psResults.GetNumUsedPairs(); INTEGER(detection)[qIdx] = psResults.GetDetection(); LOGICAL(hasCompResults)[qIdx] = (psResults.HasCompResults() ? TRUE : FALSE); REAL(changePValue)[qIdx] = psResults.GetChangePValue(); REAL(signalLogRatio)[qIdx] = psResults.GetSignalLogRatio(); REAL(signalLogRatioLow)[qIdx] = psResults.GetSignalLogRatioLow(); REAL(signalLogRatioHigh)[qIdx] = psResults.GetSignalLogRatioHigh(); INTEGER(numCommonPairs)[qIdx] = psResults.GetNumCommonPairs(); INTEGER(change)[qIdx] = psResults.GetChange(); } SEXP result; PROTECT(result = NEW_LIST(12)); SET_ELEMENT(result, 0, detectionPValue); SET_ELEMENT(result, 1, signal); SET_ELEMENT(result, 2, numPairs); SET_ELEMENT(result, 3, numUsedPairs); SET_ELEMENT(result, 4, detection); SET_ELEMENT(result, 5, hasCompResults); SET_ELEMENT(result, 6, changePValue); SET_ELEMENT(result, 7, signalLogRatio); SET_ELEMENT(result, 8, signalLogRatioLow); SET_ELEMENT(result, 9, signalLogRatioHigh); SET_ELEMENT(result, 10, numCommonPairs); SET_ELEMENT(result, 11, change); SEXP nms; PROTECT(nms = NEW_CHARACTER(12)); SET_STRING_ELT(nms, 0, mkChar("DetectionPValue")); SET_STRING_ELT(nms, 1, mkChar("Signal")); SET_STRING_ELT(nms, 2, mkChar("NumPairs")); SET_STRING_ELT(nms, 3, mkChar("NumUsedPairs")); SET_STRING_ELT(nms, 4, mkChar("Detection")); SET_STRING_ELT(nms, 5, mkChar("HasCompResults")); SET_STRING_ELT(nms, 6, mkChar("ChangePValue")); SET_STRING_ELT(nms, 7, mkChar("SignalLogRatio")); SET_STRING_ELT(nms, 8, mkChar("SignalLogRatioLow")); SET_STRING_ELT(nms, 9, mkChar("SignalLogRatioHigh")); SET_STRING_ELT(nms, 10, mkChar("NumCommonPairs")); SET_STRING_ELT(nms, 11, mkChar("Change")); SET_NAMES(result, nms); UNPROTECT(14); return result; } SEXP R_affx_GetCHPEntries(FusionCHPQuantificationData *qData) { SEXP qVec, qNm, qId; int qNbr = qData->GetEntryCount(); PROTECT(qVec = NEW_NUMERIC(qNbr)); PROTECT(qNm = NEW_CHARACTER(qNbr)); PROTECT(qId = NEW_INTEGER(qNbr)); ProbeSetQuantificationData psData; for (int qIdx=0; qIdx < qNbr; ++qIdx) { qData->GetQuantificationEntry(qIdx, psData); SET_STRING_ELT(qNm, qIdx, mkChar(psData.name.c_str())); INTEGER(qId)[qIdx] = psData.id; REAL(qVec)[qIdx] = psData.quantification; } SEXP result; PROTECT(result = NEW_LIST(2)); SET_ELEMENT(result, 0, qNm); SET_ELEMENT(result, 1, qVec); SEXP nms; PROTECT(nms = NEW_CHARACTER(2)); SET_STRING_ELT(nms, 0, mkChar("ProbeSetName")); SET_STRING_ELT(nms, 1, mkChar("QuantificationValue")); SET_NAMES(result, nms); UNPROTECT(5); return result; } SEXP R_affx_GetCHPEntries(FusionCHPQuantificationDetectionData *qData) { SEXP qVec, pValueVec, qNm, qID; int qNbr = qData->GetEntryCount(); PROTECT(qVec = NEW_NUMERIC(qNbr)); PROTECT(pValueVec = NEW_NUMERIC(qNbr)); PROTECT(qNm = NEW_CHARACTER(qNbr)); PROTECT(qID = NEW_INTEGER(qNbr)); double *q = NUMERIC_POINTER(qVec);; double *pValue = NUMERIC_POINTER(pValueVec); ProbeSetQuantificationDetectionData psData; for (int qIdx=0; qIdx < qNbr; ++qIdx) { qData->GetQuantificationDetectionEntry(qIdx, psData); q[qIdx] = psData.quantification; pValue[qIdx] = psData.pvalue; INTEGER(qID)[qIdx] = psData.id; SET_STRING_ELT(qNm, qIdx, mkChar(psData.name.c_str())); } SEXP result; PROTECT(result = NEW_LIST(4)); SET_ELEMENT(result, 0, qNm); SET_ELEMENT(result, 1, qVec); SET_ELEMENT(result, 2, pValueVec); SET_ELEMENT(result, 3, qID); SEXP nms; PROTECT(nms = NEW_CHARACTER(4)); SET_STRING_ELT(nms, 0, mkChar("ProbeSetName")); SET_STRING_ELT(nms, 1, mkChar("QuantificationValue")); SET_STRING_ELT(nms, 2, mkChar("PValue")); SET_STRING_ELT(nms, 3, mkChar("ID")); SET_NAMES(result, nms); UNPROTECT(6); return result; } int R_affx_AddCHPEntries(FusionCHPQuantificationData *chp, SEXP lst, SEXP nms, int lstIdx, bool isBrief) { if (isBrief == false) { SEXP quantEntries = PROTECT(R_affx_GetCHPEntries(chp)); SET_ELEMENT(lst, lstIdx, quantEntries); UNPROTECT(1); } SET_STRING_ELT(nms, lstIdx, mkChar("QuantificationEntries")); return lstIdx + 1; } int R_affx_AddCHPEntries(FusionCHPQuantificationDetectionData *chp, SEXP lst, SEXP nms, int lstIdx, bool isBrief) { if (isBrief == false) { SEXP quantEntries = PROTECT(R_affx_GetCHPEntries(chp)); SET_ELEMENT(lst, lstIdx, quantEntries); UNPROTECT(1); } SET_STRING_ELT(nms, lstIdx, mkChar("QuantificationEntries")); return lstIdx + 1; } SEXP R_affx_ReadCHP(FusionCHPLegacyData *chp, bool isBrief) { SEXP lst, nms; int lstIdx = 0, lstNbr = 7; PROTECT(lst = NEW_LIST(lstNbr)); PROTECT(nms = NEW_CHARACTER(lstNbr)); FusionCHPHeader header = chp->GetHeader(); lstIdx = R_affx_AddCHPMeta(chp->FileId(), header.GetAlgName(), header.GetAlgVersion(), header.GetChipType(), lst, nms, lstIdx); FusionTagValuePairTypeList params; header.AlgorithmParameters(params); lstIdx = R_affx_AddList(params, lst, nms, lstIdx, "AlgorithmParameters"); header.SummaryParameters(params); lstIdx = R_affx_AddList(params, lst, nms, lstIdx, "SummaryParameters"); if (isBrief == false) { SEXP quantEntries; switch(header.GetAssayType()) { case FusionExpression: PROTECT(quantEntries = R_affx_GetCHPExpressionResults(chp)); break; case FusionResequencing: PROTECT(quantEntries = R_affx_GetCHPReseqResults(chp)); break; case FusionGenotyping: PROTECT(quantEntries = R_affx_GetCHPGenotypingResults(chp)); break; case FusionUniversal: PROTECT(quantEntries = R_affx_GetCHPUniversalResults(chp)); break; case FusionUnknown: default: warning("unhandled quantification entry index '%d'", header.GetAssayType()); PROTECT(quantEntries = NEW_NUMERIC(0)); } SET_NAMED_ELT(lst, lstIdx, quantEntries, nms, "QuantificationEntries"); ++lstIdx; UNPROTECT(1); } SET_NAMES(lst, nms); UNPROTECT(2); return(lst); } SEXP R_affx_ReadTilingDataSeqHeader(TilingSequenceData seq) { SEXP header, hnames; char* cstr; //read in the header/params PROTECT(header = NEW_LIST(4)); PROTECT(hnames = NEW_CHARACTER(4)); SET_NAMED_ELT(header, 0, mkString(cstr = wcs_to_cstr(seq.name)), hnames, "name"); Free(cstr); SET_NAMED_ELT(header, 1, mkString(cstr = wcs_to_cstr(seq.groupName)), hnames, "groupName"); Free(cstr); SET_NAMED_ELT(header, 2, mkString(cstr = wcs_to_cstr(seq.version)), hnames, "version"); Free(cstr); SET_NAMED_ELT(header, 3, R_affx_GetList(seq.parameters), hnames, "parameters"); SET_NAMES(header, hnames); UNPROTECT(2); return header; } SEXP R_affx_ReadTilingDataSeqEntries(FusionCHPTilingData *chp, int Entry) { int eCount, i; SEXP rval, position, value, nms; CHPTilingEntry e; eCount = chp->GetTilingSequenceEntryCount(Entry); PROTECT(position = NEW_INTEGER(eCount)); PROTECT(value = NEW_NUMERIC(eCount)); for(i=0; iGetTilingSequenceEntry(i, e); INTEGER(position)[i] = e.position; REAL(value)[i] =e.value; } PROTECT(rval = NEW_LIST(2)); SET_ELEMENT(rval, 0, position); SET_ELEMENT(rval, 1, value); PROTECT(nms = NEW_CHARACTER(2)); SET_STRING_ELT(nms, 0, mkChar("position")); SET_STRING_ELT(nms, 1, mkChar("value")); SET_NAMES(rval, nms); UNPROTECT(4); return rval; } SEXP R_affx_ReadCHP(FusionCHPTilingData *chp, bool isBrief) { SEXP lst, nms, seqList, seqi, seqiNms, tmp; int lstIdx = 0, lstNbr = 6, numSeq=0, i; PROTECT(lst = NEW_LIST(lstNbr)); PROTECT(nms = NEW_CHARACTER(lstNbr)); lstIdx = R_affx_AddCHPTileMeta(chp->FileId(), chp->GetAlgName(), chp->GetAlgVersion(), lst, nms, lstIdx); SET_NAMED_ELT(lst, lstIdx, R_affx_GetList(chp->GetAlgParams()), nms, "AlgorithmParameters"); lstIdx++; numSeq = chp->GetNumberSequences(); PROTECT(tmp = NEW_INTEGER(1)); INTEGER(tmp)[0] = numSeq; SET_NAMED_ELT(lst, lstIdx, tmp, nms, "NumberofSequences"); lstIdx++; UNPROTECT(1); PROTECT(seqList = NEW_LIST(numSeq)); for(i=0; iOpenTilingSequenceDataSet(i); PROTECT(seqi = NEW_LIST(2)); SET_ELEMENT(seqi, 0, R_affx_ReadTilingDataSeqHeader(chp->GetTilingSequenceData())); SET_ELEMENT(seqi, 1, R_affx_ReadTilingDataSeqEntries(chp, i)); PROTECT(seqiNms = NEW_CHARACTER(2)); SET_STRING_ELT(seqiNms, 0, mkChar("seq")); SET_STRING_ELT(seqiNms, 1, mkChar("entries")); SET_NAMES(seqi, seqiNms); SET_ELEMENT(seqList, i, seqi); UNPROTECT(2); } SET_NAMED_ELT(lst, lstIdx, seqList, nms, "Sequences"); lstIdx++; SET_NAMES(lst, nms); UNPROTECT(3); return(lst); } SEXP R_affx_ReadCHP(FusionCHPQuantificationData *chp, bool isBrief) { SEXP lst, nms, nQ; int lstIdx = 0, lstNbr = 8, qNbr; PROTECT(lst = NEW_LIST(lstNbr)); PROTECT(nms = NEW_CHARACTER(lstNbr)); lstIdx = R_affx_AddCHPMeta(chp->FileId(), chp->GetAlgName(), chp->GetAlgVersion(), chp->GetArrayType(), lst, nms, lstIdx); SET_NAMED_ELT(lst, lstIdx, R_affx_GetList(chp->GetAlgParams()), nms, "AlgorithmParameters"); lstIdx++; SET_NAMED_ELT(lst, lstIdx, R_affx_GetList(chp->GetSummaryParams()), nms, "SummaryParameters"); lstIdx++; qNbr = chp->GetEntryCount(); PROTECT(nQ = NEW_INTEGER(1)); INTEGER(nQ)[0] = qNbr; SET_NAMED_ELT(lst, lstIdx, nQ, nms, "NumQuantificationEntries"); lstIdx++; lstIdx = R_affx_AddCHPEntries(chp, lst, nms, lstIdx, isBrief); SET_NAMES(lst, nms); UNPROTECT(3); return(lst); } /* the enumerated data types for a MultiData object ExpressionMultiDataType ExpressionControlMultiDataType GenotypeMultiDataType GenotypeControlMultiDataType CopyNumberMultiDataType CytoMultiDataType */ SEXP R_affx_ReadCHP(FusionCHPMultiDataData *chp, bool isBrief) { SEXP lst, nms, cts, conf, call, probenames, genodata, gnms; int lstIdx = 0, lstNbr, nExpr, nExprC, nGeno, nGenoC, nprotect = 0, i, nDataTypes; PROTECT(cts = NEW_INTEGER(4)); //FIXME: change if enum changes nprotect++; INTEGER(cts)[0] = nExpr = chp->GetEntryCount(ExpressionMultiDataType); INTEGER(cts)[1] = nExprC = chp->GetEntryCount(ExpressionControlMultiDataType); INTEGER(cts)[2] = nGeno = chp->GetEntryCount(GenotypeMultiDataType); INTEGER(cts)[3] = nGenoC = chp->GetEntryCount(GenotypeControlMultiDataType); PROTECT(nms = NEW_CHARACTER(4)); SET_STRING_ELT(nms, 0, mkChar("Expression")); SET_STRING_ELT(nms, 1, mkChar("ExpressionControl")); SET_STRING_ELT(nms, 2, mkChar("Genotype")); SET_STRING_ELT(nms, 3, mkChar("GenotypeControl")); SET_NAMES(cts, nms); UNPROTECT(1); /* for now ignore the next two, as affxparser seems to be using an old version of some files INTEGER(cts)[4] = nCopy = chp->GetEntryCount(CopyNumberMultiDataType); INTEGER(cts)[5] = nCyto = chp->GetEntryCount(CytoMultiDataType); */ nDataTypes = 0; if(nExpr > 0) nDataTypes++; if(nExprC > 0) nDataTypes++; if(nGeno > 0) nDataTypes++; if(nGenoC > 0) nDataTypes++; lstNbr = nDataTypes + 7; PROTECT(lst = NEW_LIST(lstNbr)); PROTECT(nms = NEW_CHARACTER(lstNbr)); nprotect+=2; lstIdx = R_affx_AddCHPMeta(chp->FileId(), chp->GetAlgName(), chp->GetAlgVersion(), chp->GetArrayType(), lst, nms, lstIdx); SET_NAMED_ELT(lst, lstIdx, R_affx_GetList(chp->GetAlgParams()), nms, "AlgorithmParameters"); lstIdx++; SET_NAMED_ELT(lst, lstIdx, R_affx_GetList(chp->GetSummaryParams()), nms, "SummaryParameters"); lstIdx++; SET_NAMED_ELT(lst, lstIdx, cts, nms, "MultiDataTypeCounts"); lstIdx++; if(nGeno > 0) { PROTECT(conf = NEW_NUMERIC(nGeno)); PROTECT(call = NEW_INTEGER(nGeno)); //should be char vector PROTECT(probenames = NEW_CHARACTER(nGeno)); for (i = 0; i < nGeno; i++) { INTEGER(call)[i] = chp->GetGenoCall(GenotypeMultiDataType, i); REAL(conf)[i] = chp->GetGenoConfidence(GenotypeMultiDataType, i); SET_STRING_ELT(probenames, i, mkChar(chp->GetProbeSetName(GenotypeMultiDataType, i).c_str())); } PROTECT(genodata = NEW_LIST(3)); PROTECT(gnms = NEW_CHARACTER(3)); SET_NAMED_ELT(genodata, 0, call, gnms, "Call"); SET_NAMED_ELT(genodata, 1, conf, gnms, "Confidence"); SET_NAMED_ELT(genodata, 2, probenames, gnms, "ProbeNames"); SET_NAMES(genodata, gnms); SET_NAMED_ELT(lst, lstIdx, genodata, nms, "Genotype"); UNPROTECT(5); } SET_NAMES(lst, nms); UNPROTECT(nprotect); return(lst); } SEXP R_affx_ReadCHP(FusionCHPQuantificationDetectionData *chp, bool isBrief) { SEXP lst, nms; int lstIdx = 0, lstNbr = 7; PROTECT(lst = NEW_LIST(lstNbr)); PROTECT(nms = NEW_CHARACTER(lstNbr)); lstIdx = R_affx_AddCHPMeta(chp->FileId(), chp->GetAlgName(), chp->GetAlgVersion(), chp->GetArrayType(), lst, nms, lstIdx); lstIdx = R_affx_AddList(chp->GetAlgParams(), lst, nms, lstIdx, "AlgorithmParameters"); lstIdx = R_affx_AddList(chp->GetSummaryParams(), lst, nms, lstIdx, "SummaryParameters"); lstIdx = R_affx_AddCHPEntries(chp, lst, nms, lstIdx, isBrief); SET_NAMES(lst, nms); UNPROTECT(2); return(lst); } extern "C" { SEXP R_affx_get_chp_file(SEXP fname, SEXP withQuantifications) { if (IS_CHARACTER(fname) == FALSE || LENGTH(fname) != 1) error("argument '%s' should be '%s'", "fname", "character(1)"); if (IS_LOGICAL(withQuantifications) == FALSE || LENGTH(withQuantifications) != 1) error("argument '%s' should be '%s'", "withQuantifications", "logical(1)"); const char *chpFileName = CHAR(STRING_ELT(fname, 0)); bool isBrief = (LOGICAL(withQuantifications)[0] == TRUE ? false : true), processed = false; int protectionCount = 0; SEXP result = R_NilValue; FusionCHPData *chp = FusionCHPDataReg::Read(chpFileName); if (chp == NULL) error("could not read '%s'", chpFileName); if (processed == false) { FusionCHPLegacyData *lChp = FusionCHPLegacyData::FromBase(chp); if (lChp != NULL) { processed = true; PROTECT(result = R_affx_ReadCHP(lChp, isBrief)); ++protectionCount; delete lChp; } } if (processed==false) { FusionCHPQuantificationData *qChp = FusionCHPQuantificationData::FromBase(chp); if (qChp != NULL) { processed = true; PROTECT(result = R_affx_ReadCHP(qChp, isBrief)); ++protectionCount; delete qChp; } } if (processed==false) { FusionCHPQuantificationDetectionData *qdChp = FusionCHPQuantificationDetectionData::FromBase(chp); if (qdChp != NULL) { processed = true; PROTECT(result = R_affx_ReadCHP(qdChp, isBrief)); ++protectionCount; delete qdChp; } } if (processed==false) { FusionCHPTilingData *tChp = FusionCHPTilingData::FromBase(chp); if(tChp != NULL) { processed = true; PROTECT(result = R_affx_ReadCHP(tChp, isBrief)); ++protectionCount; delete tChp; } } if (processed==false) { FusionCHPMultiDataData *mChp = FusionCHPMultiDataData::FromBase(chp); if(mChp != NULL) { processed = true; PROTECT(result = R_affx_ReadCHP(mChp, isBrief)); ++protectionCount; delete mChp; } } if (processed==false) { warning("unable to read CHP file '%s'", chpFileName); delete chp; } UNPROTECT(protectionCount); return result; } } affxparser/src/R_affx_clf_pgf_parser.cpp0000644000175200017520000002432114516003651021474 0ustar00biocbuildbiocbuild// FIXME: TsvFile.cpp:444 has _strtoui64 on WIN32; minGW wants strtoull #include "ClfFile.h" #include "PgfFile.h" #include "TsvFile.h" #include using namespace std; using namespace affx; #include "RAffxErrHandler.h" #include int * new_int_elt(const char* symbol, int length, SEXP rho) { SEXP tmp; PROTECT(tmp = NEW_INTEGER(length)); defineVar(install(symbol), tmp, rho); UNPROTECT(1); return INTEGER(tmp); } SEXP new_char_elt(const char* symbol, int length, SEXP rho) { SEXP tmp; PROTECT(tmp = NEW_CHARACTER(length)); defineVar(install(symbol), tmp, rho); UNPROTECT(1); return tmp; } SEXP R_affx_read_tsv_header(TsvFile& tsv) { string key, value; tsv.headersBegin(); // how many chip_type and other headers? int nOtherHeaders = 0, nChipTypeHeaders = 0; while (tsv.headersNext(key, value) == TSV_OK) { if (key=="chip_type") { ++nChipTypeHeaders; } else { ++nOtherHeaders; } } SEXP headers, headerNames, chipTypeHeaders; PROTECT(headers = NEW_LIST(nOtherHeaders+1)); PROTECT(headerNames = NEW_CHARACTER(nOtherHeaders+1)); PROTECT(chipTypeHeaders = NEW_CHARACTER(nChipTypeHeaders)); tsv.headersBegin(); nChipTypeHeaders = 0; nOtherHeaders = 1; while (tsv.headersNext(key, value) == TSV_OK) { if (key=="chip_type") { SET_STRING_ELT(chipTypeHeaders, nChipTypeHeaders++, mkChar(value.c_str())); } else { SET_ELEMENT(headers, nOtherHeaders, mkString(value.c_str())); SET_STRING_ELT(headerNames, nOtherHeaders, mkChar(key.c_str())); ++nOtherHeaders; } } SET_ELEMENT(headers, 0, chipTypeHeaders); SET_STRING_ELT(headerNames, 0, mkChar("chip_type")); SET_NAMES(headers, headerNames); UNPROTECT(3); return headers; } void R_affx_get_body(ClfFile* clf, SEXP rho) { int nx, ny; nx = clf->getXMax() + 1; ny = clf->getYMax() + 1; int *dims, *id, *x, *y; dims = new_int_elt("dims", 2, rho); id = new_int_elt("id", nx*ny, rho); x = new_int_elt("x", nx*ny, rho); y = new_int_elt("y", nx*ny, rho); dims[0] = nx; dims[1] = ny; while(clf->next_probe() == TSV_OK) { *id++ = clf->probe_id; *x++ = clf->x; *y++ = clf->y; } } void R_affx_get_body(PgfFile* pgf, SEXP rho, SEXP indices) { int nProbesets, nAtoms, nProbes; int i, prevIndex, currIndex, nextIndex=0, maxIndex; bool readAll = (indices == R_NilValue); int *pindices; // Argument 'indices' // (a) Find maximum index requested to allow for early stopping if (readAll) { maxIndex = R_LEN_T_MAX; } else { pindices = INTEGER(indices); prevIndex = 0; maxIndex = 0; for (i=0; i < length(indices); i++) { currIndex = pindices[i]; if (currIndex == prevIndex) { error("Argument 'indices' must not contain duplicated entries: %d", currIndex); } else if (currIndex < prevIndex) { error("Argument 'indices' must be sorted."); } else if (currIndex > maxIndex) { maxIndex = currIndex; } prevIndex = currIndex; } } // (b) Count the number of (probesets, atoms, probes) needed nProbesets = 0, nAtoms = 0, nProbes = 0; if (!readAll) currIndex = pindices[0]; i = 0; while (nProbesets < maxIndex && pgf->next_probeset() == TSV_OK) { ++nProbesets; if (!readAll) { // Don't read this probeset? if (nProbesets < currIndex) continue; // Next index currIndex = pindices[++i]; } while (pgf->next_atom() == TSV_OK) { ++nAtoms; while (pgf->next_probe() == TSV_OK) { ++nProbes; } } // No need to continue? if (nProbesets >= maxIndex) break; } maxIndex = nProbesets; pgf->rewind(); // (c) Setup/validate 'indices' if (readAll) { // indices <- 1:maxIndex PROTECT(indices = allocVector(INTSXP, maxIndex)); pindices = INTEGER(indices); for (i=0; i < length(indices); i++) { pindices[i] = i+1; } } else { for (i=0; i < length(indices); i++) { currIndex = pindices[i]; if (currIndex <= 0) { error("Argument 'indices' contains a non-positive element: %d", currIndex); } else if (currIndex > maxIndex) { error("Argument 'indices' contains an element out of range [1,%d]: %d", maxIndex, currIndex); } } } // (d) Allocate (probesets, atoms, probes) SEXP probeset_type, probeset_name, // atom_type, probe_type, probe_sequence; int *probeset_id, *probeset_start_atom, *atom_id, *atom_exon_position, *atom_start_probe, *probe_id, *probe_gc_count, *probe_length, *probe_interrogation_position; // probeset probeset_id = new_int_elt("probesetId", length(indices), rho); probeset_type = new_char_elt("probesetType", length(indices), rho); probeset_name = new_char_elt("probesetName", length(indices), rho); probeset_start_atom = new_int_elt("probesetStartAtom", length(indices), rho); // atom atom_id = new_int_elt("atomId", nAtoms, rho); // FIXME: where's atom_type? in docs but not .h or .cpp // atom_type = new_char_elt("atomType", nAtoms, rho); atom_exon_position = new_int_elt("atomExonPosition", nAtoms, rho); atom_start_probe = new_int_elt("atomStartProbe", nAtoms, rho); // probe probe_id = new_int_elt("probeId", nProbes, rho); probe_type = new_char_elt("probeType", nProbes, rho); probe_gc_count = new_int_elt("probeGcCount", nProbes, rho); probe_length = new_int_elt("probeLength", nProbes, rho); probe_interrogation_position = new_int_elt("probeInterrogationPosition", nProbes, rho); probe_sequence = new_char_elt("probeSequence", nProbes, rho); // (e) Read (probesets, atoms, probes) nProbesets = 0; nAtoms = nProbes = 0; for (i=0; i < length(indices); i++) { // Next index to read nextIndex = pindices[i]; // Skip to probeset of interest. while (nProbesets < nextIndex && pgf->next_probeset() == TSV_OK) { ++nProbesets; } // Sanity check if (nProbesets < nextIndex) { error("INTERNAL ERROR: Expected %d more probesets to skip in PGF file, but reached end of file.", nextIndex-nProbesets); } // Read probeset probeset_id[i] = pgf->probeset_id; SET_STRING_ELT(probeset_type, i, mkChar(pgf->probeset_type.c_str())); SET_STRING_ELT(probeset_name, i, mkChar(pgf->probeset_name.c_str())); probeset_start_atom[i] = 1 + nAtoms; while (pgf->next_atom() == TSV_OK) { atom_id[nAtoms] = pgf->atom_id; // FIXME: where's atom_type? in docs but not header atom_exon_position[nAtoms] = pgf->exon_position; atom_start_probe[nAtoms] = 1 + nProbes; ++nAtoms; while (pgf->next_probe() == TSV_OK) { probe_id[nProbes] = pgf->probe_id; SET_STRING_ELT(probe_type, nProbes, mkChar(pgf->probe_type.c_str())); probe_gc_count[nProbes] = pgf->gc_count; probe_length[nProbes] = pgf->probe_length; probe_interrogation_position[nProbes] = pgf->interrogation_position; SET_STRING_ELT(probe_sequence, nProbes, mkChar(pgf->probe_sequence.c_str())); ++nProbes; } // while (pgf->next_probe() == TSV_OK) } // while (pgf->next_atom() == TSV_OK) } // for (i=0; ...) if (readAll) UNPROTECT(1); // Temporarily allocated 'indices'. } extern "C" { SEXP R_affx_get_clf_file(SEXP fname, SEXP readBody, SEXP rho) { if (IS_CHARACTER(fname) == FALSE || LENGTH(fname) != 1) error("argument '%s' should be '%s'", "fname", "character(1)"); if (IS_LOGICAL(readBody) == FALSE || LENGTH(readBody) !=1) error("argument '%s' should be '%s'", "readBody", "logical(1)"); if (TYPEOF(rho) != ENVSXP) error("argument '%' should be '%s'", "rho", "environment"); const char *clfFileName = CHAR(STRING_ELT(fname, 0)); ClfFile *clf = new ClfFile(); // FIXME: shortcut with sequential headers -- id, x, y are sequences try { RAffxErrHandler *err = new RAffxErrHandler(true); Err::pushHandler(err); if (clf->open(string(clfFileName)) != TSV_OK) { delete clf; error("could not open clf file '%s'", clfFileName); } // header SEXP tmp; PROTECT(tmp = R_affx_read_tsv_header(clf->m_tsv)); defineVar(install("header"), tmp, rho); UNPROTECT(1); if (LOGICAL(readBody)[0] == TRUE) { R_affx_get_body(clf, rho); } delete Err::popHandler(); } catch (Except& ex) { delete Err::popHandler(); clf->close(); delete clf; error("%s", ex.what()); } clf->close(); delete clf; return rho; } SEXP R_affx_get_pgf_file(SEXP fname, SEXP readBody, SEXP rho, SEXP indices) { if (IS_CHARACTER(fname) == FALSE || LENGTH(fname) != 1) error("argument '%s' should be '%s'", "fname", "character(1)"); if (IS_LOGICAL(readBody) == FALSE || LENGTH(readBody) != 1) error("argument '%s' should be '%s'", "readBody", "logical(1)"); if (TYPEOF(rho) != ENVSXP) error("argument '%' should be '%s'", "rho", "environments"); const char *pgfFileName = CHAR(STRING_ELT(fname, 0)); PgfFile *pgf = new PgfFile(); try { RAffxErrHandler *err = new RAffxErrHandler(true); Err::pushHandler(err); if (pgf->open(string(pgfFileName)) != TSV_OK) { delete pgf; error("could not open pgf file '%s'", pgfFileName); } SEXP tmp; PROTECT(tmp = R_affx_read_tsv_header(pgf->m_tsv)); defineVar(install("header"), tmp, rho); UNPROTECT(1); if (LOGICAL(readBody)[0] == TRUE) { R_affx_get_body(pgf, rho, indices); } pgf->close(); delete Err::popHandler(); } catch (Except& ex) { delete Err::popHandler(); // errors now are fatal pgf->close(); delete pgf; error("%s", ex.what()); } delete pgf; return rho; } } affxparser/src/R_affx_constants.h0000644000175200017520000000115714516003651020203 0ustar00biocbuildbiocbuild#define R_AFFX_VERBOSE 1 #define R_AFFX_REALLY_VERBOSE 2 /* * Using R's test of endianness */ #include #ifdef WORDS_BIGENDIAN #define IS_BIG_ENDIAN 1 #endif #ifdef _MSC_VER #define _strtoui64 strtoull /* for TsvFile.cpp */ #include #define WINVER WindowsXP /* for Util.cpp, via TsvFile.cpp */ #endif /* Patch for compilation errors ("'int32_t' does not name a type") on Windows using the Rtools 2.15.0.1915-1919 toolchain, which is used by R (> 2.14.1), cf. private email on 'affxparser (devel) on Windows' on Jan 25 - Mar 5, 2012. */ #ifdef __MINGW32__ #include #endif affxparser/src/R_affx_test_bpmap_cmdline.cpp0000644000175200017520000000575314516003651022361 0ustar00biocbuildbiocbuild/******************************************** A command line parsing of cel/cdf files. Used for testing. *********************************************/ #include "BPMAPFileData.h" #include "FusionBPMAPData.h" #include using namespace std; using namespace affymetrix_fusion_io; int main(int argc, char **argv) { const char* bpmapFileName = argv[1]; CBPMAPFileData bpmap; bpmap.SetFileName(bpmapFileName); if(bpmap.ReadHeader() == false) { cout << "Failed to read the file." << endl; return 0; } bpmap.Read(); cout << "Reading " << bpmap.GetFileName() << endl; cout << "Number of sequences: " << bpmap.GetNumberSequences() << endl; cout << "Version: " << bpmap.GetVersion() << endl; int numSeq = bpmap.GetNumberSequences(); affxbpmap::CGDACSequenceItem seq; for(int i = 0; i < numSeq ; i++) { bpmap.GetSequenceItem(i, seq); cout << "Sequence Name: " << seq.GetName() << endl; cout << "Sequence Group Name: " << seq.GroupName() << endl; cout << "Sequence Full Name: " << seq.FullName() << endl; cout << "Sequence Version: " << seq.GetSeqVersion() << endl; cout << "Sequence Probe Mapping: " << seq.GetProbeMapping() << endl; cout << "Sequence Number: " << seq.GetNumber() << endl; cout << "Sequence Number of Hits: " << seq.GetNumberHits() << endl; cout << "Sequence Number of Parameters: " << seq.GetNumberParameters() << endl; // seq.GetParameter int numPar = seq.GetNumberParameters(); TagValuePairType tagPar; for(int j = 0; j < numPar; j++) { tagPar = seq.GetParameter(j); cout << " Parameter Tag: " << tagPar.Tag << endl; cout << " Parameter Value: " << tagPar.Value << endl; } int numHits = seq.GetNumberHits(); affxbpmap::GDACSequenceHitItemType seqHit; for(int j = 0; j < min(numHits, 2) ; j++) { seq.GetHitItem(j, seqHit, true); cout << " Hit pmx: " << seqHit.PMX << endl; cout << " Hit pmy: " << seqHit.PMY << endl; cout << " Hit mmx: " << seqHit.MMX << endl; cout << " Hit mmy: " << seqHit.MMY << endl; cout << " Hit matchscore: " << seqHit.MatchScore << endl; cout << " Hit pmprobe seq: " << seqHit.PMProbe << endl; cout << " Hit pmprobe length: " << seqHit.ProbeLength + 0 << endl; cout << " Hit topstrand: " << seqHit.TopStrand + 0 << endl; // cout << " Hit pmprobe packed: " << seqHit.PackedPMProbe[0] << endl; // could not get pmprobepacked to work cout << " Hit genomic center position: " << seqHit.getCenterPosition() << endl; cout << " Hit genomic start position: " << seqHit.getStartPosition() << endl; } } bpmap.Close(); return 0; } affxparser/src/R_affx_test_cdfwriter_cmdline.cpp0000644000175200017520000000755514516003651023255 0ustar00biocbuildbiocbuild/******************************************** A command line writing of cel file. Used for testing. *********************************************/ #include #include #include #include #include "CDFFileWriter.h" using namespace std; using namespace affymetrix_calvin_io; void writeExpressionCDFFile(const std::string& filename, u_int32_t probeSetCnt) { wchar_t name[100]; u_int8_t unitType = 3; u_int8_t direction = 1; u_int32_t atoms = 11; u_int8_t cellsPerAtom = 2; u_int32_t cells = atoms*cellsPerAtom; CDFData data(filename); data.SetProbeSetCnt(probeSetCnt, Expression); data.SetArrayCols(atoms); data.SetArrayRows(probeSetCnt*cellsPerAtom); CDFFileWriter writer(data); for (u_int32_t i = 0; i < probeSetCnt; ++i) { // make the unit name // FormatString1(name, 100, L"biob_%d", i); writer.OpenDataGroup(L"tmp", 1); CDFProbeSetWriter* probeWriter = writer.CreateProbeSetWriter(name,unitType,direction,atoms,cells,i,cellsPerAtom); probeWriter->WriteHeader(); for (u_int32_t atom = 0; atom < atoms; ++atom) { for (u_int32_t cellInAtom = 0; cellInAtom < cellsPerAtom; ++cellInAtom) { probeWriter->Write(atom,i*cellsPerAtom+cellInAtom,atom,atom,'C','G'); } } probeWriter->Close(); delete probeWriter; writer.CloseDataGroup(); } } int main(int argc, char **argv) { // writeExpressionCDFFile("testExp.cdf", 5); const char* cdfFileName = argv[1]; u_int32_t nProbeSets= 2; u_int32_t nCols = 10, nRows = 10; CDFData data(cdfFileName); data.SetProbeSetCnt(nProbeSets, Expression); data.SetArrayRows(nRows); data.SetArrayCols(nCols); CDFFileWriter* writer = new CDFFileWriter(data); u_int8_t unitType, direction, cellsPerAtom; u_int32_t atoms, cells; CDFProbeSetWriter* probeWriter; // const wstring probeSetName, groupSetName; // probeSetName = "affy probe set 1"; writer->OpenDataGroup(L"affy probe set 1", 1); unitType = 3; direction = 1; atoms = 2; cellsPerAtom = 2; cells = atoms*cellsPerAtom; probeWriter = writer->CreateProbeSetWriter(L"xda block name 1", unitType, direction, atoms, cells, 0,cellsPerAtom); // (const std::wstring& xdaBlockName, probeWriter->WriteHeader(); probeWriter->Write(10,10,1,3,'C','A'); probeWriter->Write(10,11,1,3,'C','A'); probeWriter->Write(11,10,2,4,'C','A'); probeWriter->Write(11,11,2,4,'C','A'); // (u_int16_t xCoord, // u_int16_t yCoord, // u_int32_t atom, // u_int32_t indexPos, // int8_t baseProbe, // int8_t baseTarget); probeWriter->Close(); delete probeWriter; writer->CloseDataGroup(); writer->OpenDataGroup(L"affy probe set 2", 2); unitType = 3; direction = 1; atoms = 2; cellsPerAtom = 2; cells = atoms*cellsPerAtom; probeWriter = writer->CreateProbeSetWriter(L"xda block name 1", unitType, direction, atoms, cells, 0,cellsPerAtom); probeWriter->WriteHeader(); probeWriter->Write(12,12,1,3,'C','A'); probeWriter->Write(12,13,1,3,'C','A'); probeWriter->Write(13,13,2,4,'C','A'); probeWriter->Write(13,12,2,4,'C','A'); probeWriter->Close(); delete probeWriter; unitType = 3; direction = 1; atoms = 2; cellsPerAtom = 2; cells = atoms*cellsPerAtom; probeWriter = writer->CreateProbeSetWriter(L"xda block name 2", unitType, direction, atoms, cells, 0,cellsPerAtom); probeWriter->WriteHeader(); probeWriter->Write(12,12,1,3,'C','A'); probeWriter->Write(12,13,1,3,'C','A'); probeWriter->Write(13,13,2,4,'C','A'); probeWriter->Write(13,12,2,4,'C','A'); probeWriter->Close(); delete probeWriter; writer->CloseDataGroup(); delete writer; return 0; } affxparser/src/R_affx_test_celwriter_cmdline.cpp0000644000175200017520000001074114516003651023253 0ustar00biocbuildbiocbuild/******************************************** A command line writing of cel file. Used for testing. *********************************************/ #include #include #include #include #include "CalvinCelFileWriter.h" #include "CELData.h" using namespace std; using namespace affymetrix_calvin_io; // note calvin instead of fusion? // void transformCels(Options &o) { // CelReader cReader; // Object for getting data from cel files. // ChipLayout layout; // Specifies probesets, locations of features on chip. // ChipStream *cStream = NULL; // Our chipstream for transforming data. // vector words; // vector cStreamVec; // affxcel::CCELFileWriter cel; // ChipStreamFactory cFactory; // if(o.targetSketch != "") // cFactory.readTargetSketchFromFile(o.targetSketch.c_str()); // if(o.writeSketch) // cFactory.setWriteSketch(o.outDir + PATH_SEPARATOR); // if(o.probeNormFile != "") // cFactory.readNormProbesFromFile(o.probeNormFile.c_str()); // const char *cdfFile = o.cdfFile.c_str(); // const char *chipStreamStr = o.chipstream.c_str(); // const char *outDir = o.outDir.c_str(); // if(o.cdfFile != "") { // /* Get the layout for the chip. */ // cout << "Opening cdf file: " << cdfFile << endl; // layout.openCdfAll(cdfFile); // } // else { // int xCount, yCount; // cout << "Setting layout dimensions from cel file. Assuming all probes are PM." << endl; // getDimensions(o.celFiles[0], xCount, yCount); // layout.setDimensions(xCount, yCount); // vector mask(layout.getProbeCount(), true); // layout.setPmProbeMask(mask); // } // /* Create our chipstream objects. */ // Util::chopString(chipStreamStr, ',', words); // for(unsigned int i = 0; i < words.size(); i++) { // string dummy; // cStream = cFactory.chipStreamForString(words[i], layout, dummy ); // if(!cStreamVec.empty()) // cStreamVec[cStreamVec.size() - 1]->registerStream(cStream); // cStreamVec.push_back(cStream); // } // cReader.setFiles(o.celFiles); // /* Let reader know to pass data to our chipstream object. */ // cReader.registerStream(cStreamVec[0]); // cout << "Reading cel files." << endl; // /* Open, read, and send cel file data one by one to chipstream. */ // bool continueProcessing = true; // cReader.readFiles(&continueProcessing); // /* Transform the data and write it out. */ // cout << "Adjusting and writing files"; // for(int i = 0; i < o.celFiles.size(); i++) { // cout.put('.'); // cout.flush(); // cel.SetFileName(o.celFiles[i].c_str()); // if(!cel.Read()) // Err::errAbort("Can't read cel file: " + cel.GetFileName()); // // the celfile might be mapped read-only -- make it writeable // cel.EnsureNotMmapped(); // int numCells = cel.GetNumCells(); // for(int celIx = 0; celIx < numCells; celIx++) { // float intensity = ChipStream::transformData(cel.GetIntensity(celIx), celIx, i, cStreamVec); // cel.SetIntensity(celIx, intensity); // } // string outCel = ToStr(outDir) + ToStr(PATH_SEPARATOR) + Util::fileRoot(cel.GetFileName()); // cel.SetFileName(outCel.c_str()); // if(!writeCelFile(cel, o.celFormat)) // cout << "Warning could not save file: " << outCel << endl; // cel.Close(); // } // cout << "Done." << endl; // } int main(int argc, char **argv) { const char* celFileName = argv[1]; CelFileData writerData(celFileName); writerData.SetIntensityCount(10); writerData.SetStdDevCount(10); writerData.SetPixelCount(10); writerData.SetOutlierCount(10); writerData.SetMaskCount(10); writerData.SetArrayType(L"arraytype"); writerData.SetMasterFileName(L"masterfile"); writerData.SetLibraryPackageName(L"libpackage"); // look in data/src/CELData.h // SetAlgorithmName // SetRows // SetCols // Stdev, numPixels, outliers and masked are optional // AddAlgorithmParameter CelFileWriter writer(writerData); FloatVector intensities; intensities.push_back(10.0); intensities.push_back(11.0); intensities.push_back(12.0); intensities.push_back(13.0); writer.WriteIntensities(intensities); FloatVector stdDevs; stdDevs.push_back(10.0); stdDevs.push_back(11.0); stdDevs.push_back(12.0); stdDevs.push_back(13.0); writer.WriteStdDevs(stdDevs); Int16Vector pixels; pixels.push_back(10); pixels.push_back(11); pixels.push_back(12); pixels.push_back(13); writer.WritePixels(pixels); return 0; } affxparser/src/R_affx_test_cmd_line.cpp0000644000175200017520000000566514516003651021343 0ustar00biocbuildbiocbuild/******************************************** A command line parsing of cel/cdf files. Used for testing. *********************************************/ #include "FusionCELData.h" #include "FusionCDFData.h" #include using namespace std; using namespace affymetrix_fusion_io; int main(int argc, char **argv) { const char* celFileName = argv[1]; const char* cdfFileName = argv[2]; FusionCELData cel; FusionCDFData cdf; try { cel.SetFileName(celFileName); if (cel.Read() == false) { cout << "Failed to read the file." << endl; return 0; } int n = (int) cel.GetNumCells(); cout << "Only printing first 10 lines in CDF file." << endl; for (int i = 0; i < 10; i++) { cout << "intensity: " << cel.GetIntensity(i) << " x: " << cel.IndexToX(i) << " y: " << cel.IndexToY(i) << " pixels: " << cel.GetPixels(i) << " stdv: " << cel.GetStdv(i) << endl; if (cel.IsOutlier(i) == true) { cout << "Outlier at index: " << i << endl; } if (cel.IsMasked(i) == true) { cout << "Masked at index: " << i << endl; } } if(argc > 2){ cdf.SetFileName(cdfFileName); if (cdf.Read() == false) { cout << "Failed to read the CDF file." << endl; return 0; } int nsets = cdf.GetHeader().GetNumProbeSets(); std::string name; for (int iset=0; iset= 4.9.3): introduced in R (>= 3.3.0) - gcc (>= 4.6.3): R (< 3.3.0) and some R (>= 3.3.0) installs Henrik Bengtsson, 2016-04-05 *****************************************************************/ #define GCC_VERSION (__GNUC__ * 10000 \ + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) #if GCC_VERSION >= 120200 #include "_mingw_gcc1202.h" #elif GCC_VERSION >= 100200 #include "_mingw_gcc1002.h" #elif GCC_VERSION >= 40903 #include "_mingw_gcc493.h" #elif GCC_VERSION >= 40603 #include "_mingw_gcc463.h" #endif affxparser/src/_mingw_gcc1002.h0000644000175200017520000004122114516003651017275 0ustar00biocbuildbiocbuild/* This is a copy of _mingw.h from MinGW headers (the toolchain installation, should match the toolchain used. Modified/hacked to provide __uuidof() operator - see HACK below. There should be a cleaner solution, but this is what has been done before. */ /** * This file has no copyright assigned and is placed in the Public Domain. * This file is part of the mingw-w64 runtime package. * No warranty is given; refer to the file DISCLAIMER.PD within this package. */ #ifndef _INC__MINGW_H #define _INC__MINGW_H #include "_mingw_mac.h" #include "_mingw_secapi.h" /* Include _cygwin.h if we're building a Cygwin application. */ #ifdef __CYGWIN__ #include "_cygwin.h" #endif /* Target specific macro replacement for type "long". In the Windows API, the type long is always 32 bit, even if the target is 64 bit (LLP64). On 64 bit Cygwin, the type long is 64 bit (LP64). So, to get the right sized definitions and declarations, all usage of type long in the Windows headers have to be replaced by the below defined macro __LONG32. */ #ifndef __LP64__ /* 32 bit target, 64 bit Mingw target */ #define __LONG32 long #else /* 64 bit Cygwin target */ #define __LONG32 int #endif /* C/C++ specific language defines. */ #ifdef _WIN64 #ifdef __stdcall #undef __stdcall #endif #define __stdcall #endif #ifndef __GNUC__ # ifndef __MINGW_IMPORT # define __MINGW_IMPORT __declspec(dllimport) # endif # ifndef _CRTIMP # define _CRTIMP __declspec(dllimport) # endif # define __DECLSPEC_SUPPORTED # define __attribute__(x) /* nothing */ #else /* __GNUC__ */ # ifdef __declspec # ifndef __MINGW_IMPORT /* Note the extern. This is needed to work around GCC's limitations in handling dllimport attribute. */ # define __MINGW_IMPORT extern __attribute__ ((__dllimport__)) # endif # ifndef _CRTIMP # undef __USE_CRTIMP # if !defined (_CRTBLD) && !defined (_SYSCRT) # define __USE_CRTIMP 1 # endif # ifdef __USE_CRTIMP # define _CRTIMP __attribute__ ((__dllimport__)) # else # define _CRTIMP # endif # endif # define __DECLSPEC_SUPPORTED # else /* __declspec */ # undef __DECLSPEC_SUPPORTED # undef __MINGW_IMPORT # ifndef _CRTIMP # define _CRTIMP # endif # endif /* __declspec */ #endif /* __GNUC__ */ /* HACK: to get __uuidof() */ #ifdef _MSC_VER #define USE___UUIDOF 0 #else #define USE___UUIDOF 0 #endif #if !defined(_MSC_VER) && !defined(_inline) #define _inline __inline #endif #ifdef __cplusplus # define __CRT_INLINE inline #elif defined(_MSC_VER) # define __CRT_INLINE __inline #else # if ((__MINGW_GNUC_PREREQ(4, 3) || defined(__clang__)) && __STDC_VERSION__ >= 199901L) # define __CRT_INLINE extern inline __attribute__((__gnu_inline__)) # else # define __CRT_INLINE extern __inline__ # endif #endif #if !defined(__MINGW_INTRIN_INLINE) && defined(__GNUC__) #define __MINGW_INTRIN_INLINE extern __inline__ __attribute__((__always_inline__,__gnu_inline__)) #endif #ifndef __CYGWIN__ #ifdef __NO_INLINE__ #undef __CRT__NO_INLINE #define __CRT__NO_INLINE 1 #endif #endif #ifdef __cplusplus # define __UNUSED_PARAM(x) #else # ifdef __GNUC__ # define __UNUSED_PARAM(x) x __attribute__ ((__unused__)) # else # define __UNUSED_PARAM(x) x # endif #endif #ifndef __GNUC__ # ifdef _MSC_VER # define __restrict__ __restrict # else # define __restrict__ /* nothing */ # endif #endif /* !__GNUC__ */ #if __MINGW_GNUC_PREREQ (3,1) && !defined __GNUG__ # define __restrict_arr __restrict #elif defined(_MSC_VER) # define __restrict_arr __restrict #else # ifdef __GNUC__ # define __restrict_arr /* Not supported in old GCC. */ # else # if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L # define __restrict_arr restrict # else # define __restrict_arr /* Not supported. */ # endif # endif #endif #ifdef __GNUC__ #define __MINGW_ATTRIB_NORETURN __attribute__ ((__noreturn__)) #define __MINGW_ATTRIB_CONST __attribute__ ((__const__)) #elif __MINGW_MSC_PREREQ(12, 0) #define __MINGW_ATTRIB_NORETURN __declspec(noreturn) #define __MINGW_ATTRIB_CONST #else #define __MINGW_ATTRIB_NORETURN #define __MINGW_ATTRIB_CONST #endif #if __MINGW_GNUC_PREREQ (3, 0) #define __MINGW_ATTRIB_MALLOC __attribute__ ((__malloc__)) #define __MINGW_ATTRIB_PURE __attribute__ ((__pure__)) #elif __MINGW_MSC_PREREQ(14, 0) #define __MINGW_ATTRIB_MALLOC __declspec(noalias) __declspec(restrict) #define __MINGW_ATTRIB_PURE #else #define __MINGW_ATTRIB_MALLOC #define __MINGW_ATTRIB_PURE #endif /* Attribute `nonnull' was valid as of gcc 3.3. We don't use GCC's variadiac macro facility, because variadic macros cause syntax errors with --traditional-cpp. */ #if __MINGW_GNUC_PREREQ (3, 3) #define __MINGW_ATTRIB_NONNULL(arg) __attribute__ ((__nonnull__ (arg))) #else #define __MINGW_ATTRIB_NONNULL(arg) #endif /* GNUC >= 3.3 */ #ifdef __GNUC__ #define __MINGW_ATTRIB_UNUSED __attribute__ ((__unused__)) #else #define __MINGW_ATTRIB_UNUSED #endif /* ATTRIBUTE_UNUSED */ #if __MINGW_GNUC_PREREQ (3, 1) #define __MINGW_ATTRIB_USED __attribute__ ((__used__)) #define __MINGW_ATTRIB_DEPRECATED __attribute__ ((__deprecated__)) #if __MINGW_GNUC_PREREQ (4, 5) || defined (__clang__) #define __MINGW_ATTRIB_DEPRECATED_MSG(x) __attribute__ ((__deprecated__(x))) #endif #elif __MINGW_MSC_PREREQ(12, 0) #define __MINGW_ATTRIB_USED #define __MINGW_ATTRIB_DEPRECATED __declspec(deprecated) #else #define __MINGW_ATTRIB_USED __MINGW_ATTRIB_UNUSED #define __MINGW_ATTRIB_DEPRECATED #endif /* GNUC >= 3.1 */ #ifndef __MINGW_ATTRIB_DEPRECATED_MSG #define __MINGW_ATTRIB_DEPRECATED_MSG(x) __MINGW_ATTRIB_DEPRECATED #endif #if __MINGW_GNUC_PREREQ (3, 3) #define __MINGW_NOTHROW __attribute__ ((__nothrow__)) #elif __MINGW_MSC_PREREQ(12, 0) && defined (__cplusplus) #define __MINGW_NOTHROW __declspec(nothrow) #else #define __MINGW_NOTHROW #endif #if __MINGW_GNUC_PREREQ (4, 4) #define __MINGW_ATTRIB_NO_OPTIMIZE __attribute__((__optimize__ ("0"))) #else #define __MINGW_ATTRIB_NO_OPTIMIZE #endif #if __MINGW_GNUC_PREREQ (4, 4) #define __MINGW_PRAGMA_PARAM(x) _Pragma (#x) #elif __MINGW_MSC_PREREQ (13, 1) #define __MINGW_PRAGMA_PARAM(x) __pragma (x) #else #define __MINGW_PRAGMA_PARAM(x) #endif #define __MINGW_BROKEN_INTERFACE(x) \ __MINGW_PRAGMA_PARAM(message ("Interface " _CRT_STRINGIZE(x) \ " has unverified layout.")) #ifndef __MSVCRT_VERSION__ /* High byte is the major version, low byte is the minor. */ # ifndef _UCRT # define __MSVCRT_VERSION__ 0xE00 # else # define __MSVCRT_VERSION__ 0xE00 # endif #endif #ifndef _WIN32_WINNT #define _WIN32_WINNT 0x502 #endif #ifndef _INT128_DEFINED #define _INT128_DEFINED #ifdef __GNUC__ #define __int8 char #define __int16 short #define __int32 int #define __int64 long long #ifdef _WIN64 #if (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 1)) && \ !defined(__SIZEOF_INT128__) /* clang >= 3.1 has __int128 but no size macro */ #define __SIZEOF_INT128__ 16 #endif #ifndef __SIZEOF_INT128__ typedef int __int128 __attribute__ ((__mode__ (TI))); #endif #endif #endif /* __GNUC__ */ #endif /* _INT128_DEFINED */ #ifdef __GNUC__ #define __ptr32 #define __ptr64 #ifndef __unaligned #define __unaligned #endif #ifndef __w64 #define __w64 #endif #ifdef __cplusplus #define __forceinline inline __attribute__((__always_inline__)) #else #define __forceinline extern __inline__ __attribute__((__always_inline__,__gnu_inline__)) #endif /* __cplusplus */ #endif /* __GNUC__ */ #if !defined(_WIN32) && !defined(__CYGWIN__) #error Only Win32 target is supported! #endif #ifndef __nothrow #ifdef __cplusplus #define __nothrow __MINGW_NOTHROW #else #define __nothrow #endif #endif /* __nothrow */ #include /* other headers depend on this include */ #ifndef _CRT_STRINGIZE #define __CRT_STRINGIZE(_Value) #_Value #define _CRT_STRINGIZE(_Value) __CRT_STRINGIZE(_Value) #endif /* _CRT_STRINGIZE */ #ifndef _CRT_WIDE #define __CRT_WIDE(_String) L ## _String #define _CRT_WIDE(_String) __CRT_WIDE(_String) #endif /* _CRT_WIDE */ #ifndef _W64 #define _W64 #endif #ifndef _CRTIMP_NOIA64 #ifdef __ia64__ #define _CRTIMP_NOIA64 #else #define _CRTIMP_NOIA64 _CRTIMP #endif #endif /* _CRTIMP_NOIA64 */ #ifndef _CRTIMP2 #define _CRTIMP2 _CRTIMP #endif #ifndef _CRTIMP_ALTERNATIVE #define _CRTIMP_ALTERNATIVE _CRTIMP #define _CRT_ALTERNATIVE_IMPORTED #endif /* _CRTIMP_ALTERNATIVE */ #ifndef _MRTIMP2 #define _MRTIMP2 _CRTIMP #endif /* We have to define _DLL for gcc based mingw version. This define is set by VC, when DLL-based runtime is used. So, gcc based runtime just have DLL-base runtime, therefore this define has to be set. As our headers are possibly used by windows compiler having a static C-runtime, we make this definition gnu compiler specific here. */ #if !defined (_DLL) && defined (__GNUC__) #define _DLL #endif #ifndef _MT #define _MT #endif #ifndef _MCRTIMP #define _MCRTIMP _CRTIMP #endif #ifndef _CRTIMP_PURE #define _CRTIMP_PURE _CRTIMP #endif #ifndef _PGLOBAL #define _PGLOBAL #endif #ifndef _AGLOBAL #define _AGLOBAL #endif #define _SECURECRT_FILL_BUFFER_PATTERN 0xFD #define _CRT_DEPRECATE_TEXT(_Text) __declspec(deprecated) #ifndef _CRT_INSECURE_DEPRECATE_MEMORY #define _CRT_INSECURE_DEPRECATE_MEMORY(_Replacement) #endif #ifndef _CRT_INSECURE_DEPRECATE_GLOBALS #define _CRT_INSECURE_DEPRECATE_GLOBALS(_Replacement) #endif #ifndef _CRT_MANAGED_HEAP_DEPRECATE #define _CRT_MANAGED_HEAP_DEPRECATE #endif #ifndef _CRT_OBSOLETE #define _CRT_OBSOLETE(_NewItem) #endif #ifndef __WIDL__ #if defined (_WIN32) && !defined (_WIN64) && !defined (__MINGW_USE_VC2005_COMPAT) #ifndef _USE_32BIT_TIME_T #define _USE_32BIT_TIME_T #endif #endif #ifndef _CONST_RETURN #define _CONST_RETURN #endif #ifndef UNALIGNED #if defined(_M_IA64) || defined(_M_AMD64) #define UNALIGNED __unaligned #else #define UNALIGNED #endif #endif /* UNALIGNED */ #ifndef _CRT_ALIGN #ifdef _MSC_VER #define _CRT_ALIGN(x) __declspec(align(x)) #else /* __GNUC__ */ #define _CRT_ALIGN(x) __attribute__ ((__aligned__ (x))) #endif #endif /* _CRT_ALIGN */ #endif /* __WIDL__ */ #ifndef __CRTDECL #define __CRTDECL __cdecl #endif #define _ARGMAX 100 #ifndef _TRUNCATE #define _TRUNCATE ((size_t)-1) #endif #ifndef _CRT_UNUSED #define _CRT_UNUSED(x) (void)x #endif /* MSVC defines _NATIVE_NULLPTR_SUPPORTED when nullptr is supported. We emulate it here for GCC. */ #if __MINGW_GNUC_PREREQ(4, 6) #if defined(__GNUC__) && (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) #define _NATIVE_NULLPTR_SUPPORTED #endif #endif /* We are activating __USE_MINGW_ANSI_STDIO for various define indicators. * printf ll modifier (unsupported by msvcrt.dll) is required by C99 and C++11 standards. */ #if (defined (_POSIX) || defined (_POSIX_SOURCE) || defined (_POSIX_C_SOURCE) \ || defined (_ISOC99_SOURCE) \ || (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L && __MSVCRT_VERSION__ < 0xE00) \ || (defined (__cplusplus) && __cplusplus >= 201103L && __MSVCRT_VERSION__ < 0xE00) \ || defined (_XOPEN_SOURCE) || defined (_XOPEN_SOURCE_EXTENDED) \ || defined (_GNU_SOURCE) \ || defined (_SVID_SOURCE)) \ && !defined(__USE_MINGW_ANSI_STDIO) /* Enable __USE_MINGW_ANSI_STDIO if user did _not_ specify it explicitly... */ # define __USE_MINGW_ANSI_STDIO 1 #endif /* We are defining __USE_MINGW_ANSI_STDIO as 0 or 1 */ #if !defined(__USE_MINGW_ANSI_STDIO) #define __USE_MINGW_ANSI_STDIO 0 /* was not defined so it should be 0 */ #elif (__USE_MINGW_ANSI_STDIO + 0) != 0 || (1 - __USE_MINGW_ANSI_STDIO - 1) == 2 #define __USE_MINGW_ANSI_STDIO 1 /* was defined as nonzero or empty so it should be 1 */ #else #define __USE_MINGW_ANSI_STDIO 0 /* was defined as (int)zero and non-empty so it should be 0 */ #endif /* _dowildcard is an int that controls the globbing of the command line. * The MinGW32 (mingw.org) runtime calls it _CRT_glob, so we are adding * a compatibility definition here: you can use either of _CRT_glob or * _dowildcard . * If _dowildcard is non-zero, the command line will be globbed: *.* * will be expanded to be all files in the startup directory. * In the mingw-w64 library a _dowildcard variable is defined as being * 0, therefore command line globbing is DISABLED by default. To turn it * on and to leave wildcard command line processing MS's globbing code, * include a line in one of your source modules defining _dowildcard and * setting it to -1, like so: * int _dowildcard = -1; */ #undef _CRT_glob #define _CRT_glob _dowildcard #if defined(_MSC_VER) && !defined(_MSC_EXTENSIONS) #define NONAMELESSUNION 1 #endif #if defined(NONAMELESSSTRUCT) && \ !defined(NONAMELESSUNION) #define NONAMELESSUNION 1 #endif #if defined(NONAMELESSUNION) && \ !defined(NONAMELESSSTRUCT) #define NONAMELESSSTRUCT 1 #endif #ifndef __ANONYMOUS_DEFINED #define __ANONYMOUS_DEFINED #define _ANONYMOUS_UNION __MINGW_EXTENSION #define _ANONYMOUS_STRUCT __MINGW_EXTENSION #ifndef NONAMELESSUNION #define _UNION_NAME(x) #define _STRUCT_NAME(x) #else /* NONAMELESSUNION */ #define _UNION_NAME(x) x #define _STRUCT_NAME(x) x #endif #endif /* __ANONYMOUS_DEFINED */ #ifndef DUMMYUNIONNAME # ifdef NONAMELESSUNION # define DUMMYUNIONNAME u # define DUMMYUNIONNAME1 u1 /* Wine uses this variant */ # define DUMMYUNIONNAME2 u2 # define DUMMYUNIONNAME3 u3 # define DUMMYUNIONNAME4 u4 # define DUMMYUNIONNAME5 u5 # define DUMMYUNIONNAME6 u6 # define DUMMYUNIONNAME7 u7 # define DUMMYUNIONNAME8 u8 # define DUMMYUNIONNAME9 u9 # else /* NONAMELESSUNION */ # define DUMMYUNIONNAME # define DUMMYUNIONNAME1 /* Wine uses this variant */ # define DUMMYUNIONNAME2 # define DUMMYUNIONNAME3 # define DUMMYUNIONNAME4 # define DUMMYUNIONNAME5 # define DUMMYUNIONNAME6 # define DUMMYUNIONNAME7 # define DUMMYUNIONNAME8 # define DUMMYUNIONNAME9 # endif #endif /* DUMMYUNIONNAME */ #ifndef DUMMYSTRUCTNAME # ifdef NONAMELESSUNION # define DUMMYSTRUCTNAME s # define DUMMYSTRUCTNAME1 s1 /* Wine uses this variant */ # define DUMMYSTRUCTNAME2 s2 # define DUMMYSTRUCTNAME3 s3 # define DUMMYSTRUCTNAME4 s4 # define DUMMYSTRUCTNAME5 s5 # else # define DUMMYSTRUCTNAME # define DUMMYSTRUCTNAME1 /* Wine uses this variant */ # define DUMMYSTRUCTNAME2 # define DUMMYSTRUCTNAME3 # define DUMMYSTRUCTNAME4 # define DUMMYSTRUCTNAME5 # endif #endif /* DUMMYSTRUCTNAME */ /* Macros for __uuidof template-based emulation */ #if defined(__cplusplus) && (USE___UUIDOF == 0) #if __cpp_constexpr >= 200704l && __cpp_inline_variables >= 201606L #define __CRT_UUID_DECL(type,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) \ extern "C++" { \ template<> struct __mingw_uuidof_s { \ static constexpr IID __uuid_inst = { \ l,w1,w2, {b1,b2,b3,b4,b5,b6,b7,b8} \ }; \ }; \ template<> constexpr const GUID &__mingw_uuidof() { \ return __mingw_uuidof_s::__uuid_inst; \ } \ template<> constexpr const GUID &__mingw_uuidof() { \ return __mingw_uuidof_s::__uuid_inst; \ } \ } #else #define __CRT_UUID_DECL(type,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) \ extern "C++" { \ template<> inline const GUID &__mingw_uuidof() { \ static const IID __uuid_inst = {l,w1,w2, {b1,b2,b3,b4,b5,b6,b7,b8}}; \ return __uuid_inst; \ } \ template<> inline const GUID &__mingw_uuidof() { \ return __mingw_uuidof(); \ } \ } #endif #define __uuidof(type) __mingw_uuidof<__typeof(type)>() #else #define __CRT_UUID_DECL(type,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) #endif #ifdef __cplusplus extern "C" { #endif #ifdef __MINGW_INTRIN_INLINE #ifdef __has_builtin #define __MINGW_DEBUGBREAK_IMPL !__has_builtin(__debugbreak) #else #define __MINGW_DEBUGBREAK_IMPL 1 #endif #if __MINGW_DEBUGBREAK_IMPL == 1 void __cdecl __debugbreak(void); __MINGW_INTRIN_INLINE void __cdecl __debugbreak(void) { __asm__ __volatile__("int {$}3":); } #endif #endif /* mingw-w64 specific functions: */ const char *__mingw_get_crt_info (void); #ifdef __cplusplus } #endif #endif /* _INC__MINGW_H */ #ifndef MINGW_SDK_INIT #define MINGW_SDK_INIT /* for backward compatibility */ #ifndef MINGW_HAS_SECURE_API #define MINGW_HAS_SECURE_API 1 #endif #define __STDC_SECURE_LIB__ 200411L #define __GOT_SECURE_LIB__ __STDC_SECURE_LIB__ #ifndef __WIDL__ #include "sdks/_mingw_ddk.h" #endif #endif /* MINGW_SDK_INIT */ affxparser/src/_mingw_gcc1202.h0000644000175200017520000004215114516003651017302 0ustar00biocbuildbiocbuild/* This is a copy of _mingw.h from MinGW headers (the toolchain installation, should match the toolchain used. Modified/hacked to provide __uuidof() operator - see HACK below. There should be a cleaner solution, but this is what has been done before. */ /** * This file has no copyright assigned and is placed in the Public Domain. * This file is part of the mingw-w64 runtime package. * No warranty is given; refer to the file DISCLAIMER.PD within this package. */ #ifndef _INC__MINGW_H #define _INC__MINGW_H #include "_mingw_mac.h" #include "_mingw_secapi.h" /* Include _cygwin.h if we're building a Cygwin application. */ #ifdef __CYGWIN__ #include "_cygwin.h" #endif /* Target specific macro replacement for type "long". In the Windows API, the type long is always 32 bit, even if the target is 64 bit (LLP64). On 64 bit Cygwin, the type long is 64 bit (LP64). So, to get the right sized definitions and declarations, all usage of type long in the Windows headers have to be replaced by the below defined macro __LONG32. */ #ifndef __LP64__ /* 32 bit target, 64 bit Mingw target */ #define __LONG32 long #else /* 64 bit Cygwin target */ #define __LONG32 int #endif /* C/C++ specific language defines. */ #ifdef _WIN64 #ifdef __stdcall #undef __stdcall #endif #define __stdcall #endif #ifndef __GNUC__ # ifndef __MINGW_IMPORT # define __MINGW_IMPORT __declspec(dllimport) # endif # ifndef _CRTIMP # define _CRTIMP __declspec(dllimport) # endif # define __DECLSPEC_SUPPORTED # define __attribute__(x) /* nothing */ #else /* __GNUC__ */ # ifdef __declspec # ifndef __MINGW_IMPORT /* Note the extern. This is needed to work around GCC's limitations in handling dllimport attribute. */ # define __MINGW_IMPORT extern __attribute__ ((__dllimport__)) # endif # ifndef _CRTIMP # undef __USE_CRTIMP # if !defined (_CRTBLD) && !defined (_SYSCRT) # define __USE_CRTIMP 1 # endif # ifdef __USE_CRTIMP # define _CRTIMP __attribute__ ((__dllimport__)) # else # define _CRTIMP # endif # endif # define __DECLSPEC_SUPPORTED # else /* __declspec */ # undef __DECLSPEC_SUPPORTED # undef __MINGW_IMPORT # ifndef _CRTIMP # define _CRTIMP # endif # endif /* __declspec */ #endif /* __GNUC__ */ #ifdef _MSC_VER #define USE___UUIDOF 0 #else #define USE___UUIDOF 0 #endif #if !defined(_MSC_VER) && !defined(_inline) #define _inline __inline #endif #ifdef __cplusplus # define __CRT_INLINE inline #elif defined(_MSC_VER) # define __CRT_INLINE __inline #else # if ((__MINGW_GNUC_PREREQ(4, 3) || defined(__clang__)) && __STDC_VERSION__ >= 199901L) # define __CRT_INLINE extern inline __attribute__((__gnu_inline__)) # else # define __CRT_INLINE extern __inline__ # endif #endif #if !defined(__MINGW_INTRIN_INLINE) && defined(__GNUC__) #define __MINGW_INTRIN_INLINE extern __inline__ __attribute__((__always_inline__,__gnu_inline__)) #endif #ifndef __CYGWIN__ #ifdef __NO_INLINE__ #undef __CRT__NO_INLINE #define __CRT__NO_INLINE 1 #endif #endif #ifdef __cplusplus # define __UNUSED_PARAM(x) #else # ifdef __GNUC__ # define __UNUSED_PARAM(x) x __attribute__ ((__unused__)) # else # define __UNUSED_PARAM(x) x # endif #endif #ifndef __GNUC__ # ifdef _MSC_VER # define __restrict__ __restrict # else # define __restrict__ /* nothing */ # endif #endif /* !__GNUC__ */ #if __MINGW_GNUC_PREREQ (3,1) && !defined __GNUG__ # define __restrict_arr __restrict #elif defined(_MSC_VER) # define __restrict_arr __restrict #else # ifdef __GNUC__ # define __restrict_arr /* Not supported in old GCC. */ # else # if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L # define __restrict_arr restrict # else # define __restrict_arr /* Not supported. */ # endif # endif #endif #ifdef __GNUC__ #define __MINGW_ATTRIB_NORETURN __attribute__ ((__noreturn__)) #define __MINGW_ATTRIB_CONST __attribute__ ((__const__)) #elif __MINGW_MSC_PREREQ(12, 0) #define __MINGW_ATTRIB_NORETURN __declspec(noreturn) #define __MINGW_ATTRIB_CONST #else #define __MINGW_ATTRIB_NORETURN #define __MINGW_ATTRIB_CONST #endif #if __MINGW_GNUC_PREREQ (3, 0) #define __MINGW_ATTRIB_MALLOC __attribute__ ((__malloc__)) #define __MINGW_ATTRIB_PURE __attribute__ ((__pure__)) #elif __MINGW_MSC_PREREQ(14, 0) #define __MINGW_ATTRIB_MALLOC __declspec(noalias) __declspec(restrict) #define __MINGW_ATTRIB_PURE #else #define __MINGW_ATTRIB_MALLOC #define __MINGW_ATTRIB_PURE #endif /* Attribute `nonnull' was valid as of gcc 3.3. We don't use GCC's variadiac macro facility, because variadic macros cause syntax errors with --traditional-cpp. */ #if __MINGW_GNUC_PREREQ (3, 3) #define __MINGW_ATTRIB_NONNULL(arg) __attribute__ ((__nonnull__ (arg))) #else #define __MINGW_ATTRIB_NONNULL(arg) #endif /* GNUC >= 3.3 */ #ifdef __GNUC__ #define __MINGW_ATTRIB_UNUSED __attribute__ ((__unused__)) #else #define __MINGW_ATTRIB_UNUSED #endif /* ATTRIBUTE_UNUSED */ #if __MINGW_GNUC_PREREQ (3, 1) #define __MINGW_ATTRIB_USED __attribute__ ((__used__)) #define __MINGW_ATTRIB_DEPRECATED __attribute__ ((__deprecated__)) #if __MINGW_GNUC_PREREQ (4, 5) || defined (__clang__) #define __MINGW_ATTRIB_DEPRECATED_MSG(x) __attribute__ ((__deprecated__(x))) #endif #elif __MINGW_MSC_PREREQ(12, 0) #define __MINGW_ATTRIB_USED #define __MINGW_ATTRIB_DEPRECATED __declspec(deprecated) #else #define __MINGW_ATTRIB_USED __MINGW_ATTRIB_UNUSED #define __MINGW_ATTRIB_DEPRECATED #endif /* GNUC >= 3.1 */ #ifndef __MINGW_ATTRIB_DEPRECATED_MSG #define __MINGW_ATTRIB_DEPRECATED_MSG(x) __MINGW_ATTRIB_DEPRECATED #endif #if __MINGW_GNUC_PREREQ (3, 3) #define __MINGW_NOTHROW __attribute__ ((__nothrow__)) #elif __MINGW_MSC_PREREQ(12, 0) && defined (__cplusplus) #define __MINGW_NOTHROW __declspec(nothrow) #else #define __MINGW_NOTHROW #endif #if __MINGW_GNUC_PREREQ (4, 4) #define __MINGW_ATTRIB_NO_OPTIMIZE __attribute__((__optimize__ ("0"))) #else #define __MINGW_ATTRIB_NO_OPTIMIZE #endif #if __MINGW_GNUC_PREREQ (4, 4) #define __MINGW_PRAGMA_PARAM(x) _Pragma (#x) #elif __MINGW_MSC_PREREQ (13, 1) #define __MINGW_PRAGMA_PARAM(x) __pragma (x) #else #define __MINGW_PRAGMA_PARAM(x) #endif #define __MINGW_BROKEN_INTERFACE(x) \ __MINGW_PRAGMA_PARAM(message ("Interface " _CRT_STRINGIZE(x) \ " has unverified layout.")) #ifndef __MSVCRT_VERSION__ /* High byte is the major version, low byte is the minor. */ # if defined(__CRTDLL__) # define __MSVCRT_VERSION__ 0x00 # elif defined(_UCRT) # define __MSVCRT_VERSION__ 0xE00 # else # define __MSVCRT_VERSION__ 0xE00 # endif #endif #if !defined(_UCRT) && ((__MSVCRT_VERSION__ >= 0x1400) || (__MSVCRT_VERSION__ >= 0xE00 && __MSVCRT_VERSION__ < 0x1000)) /* Allow both 0x1400 and 0xE00 to identify UCRT */ #define _UCRT #endif #ifndef _WIN32_WINNT #define _WIN32_WINNT 0xa00 #endif #ifndef _INT128_DEFINED #define _INT128_DEFINED #ifdef __GNUC__ #define __int8 char #define __int16 short #define __int32 int #define __int64 long long #ifdef _WIN64 #if (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 1)) && \ !defined(__SIZEOF_INT128__) /* clang >= 3.1 has __int128 but no size macro */ #define __SIZEOF_INT128__ 16 #endif #ifndef __SIZEOF_INT128__ typedef int __int128 __attribute__ ((__mode__ (TI))); #endif #endif #endif /* __GNUC__ */ #endif /* _INT128_DEFINED */ #ifdef __GNUC__ #define __ptr32 #define __ptr64 #ifndef __unaligned #define __unaligned #endif #ifndef __w64 #define __w64 #endif #ifdef __cplusplus #define __forceinline inline __attribute__((__always_inline__)) #else #define __forceinline extern __inline__ __attribute__((__always_inline__,__gnu_inline__)) #endif /* __cplusplus */ #endif /* __GNUC__ */ #if !defined(_WIN32) && !defined(__CYGWIN__) #error Only Win32 target is supported! #endif #ifndef __nothrow #ifdef __cplusplus #define __nothrow __MINGW_NOTHROW #else #define __nothrow #endif #endif /* __nothrow */ #include /* other headers depend on this include */ #ifndef _CRT_STRINGIZE #define __CRT_STRINGIZE(_Value) #_Value #define _CRT_STRINGIZE(_Value) __CRT_STRINGIZE(_Value) #endif /* _CRT_STRINGIZE */ #ifndef _CRT_WIDE #define __CRT_WIDE(_String) L ## _String #define _CRT_WIDE(_String) __CRT_WIDE(_String) #endif /* _CRT_WIDE */ #ifndef _W64 #define _W64 #endif #ifndef _CRTIMP_NOIA64 #ifdef __ia64__ #define _CRTIMP_NOIA64 #else #define _CRTIMP_NOIA64 _CRTIMP #endif #endif /* _CRTIMP_NOIA64 */ #ifndef _CRTIMP2 #define _CRTIMP2 _CRTIMP #endif #ifndef _CRTIMP_ALTERNATIVE #define _CRTIMP_ALTERNATIVE _CRTIMP #define _CRT_ALTERNATIVE_IMPORTED #endif /* _CRTIMP_ALTERNATIVE */ #ifndef _MRTIMP2 #define _MRTIMP2 _CRTIMP #endif /* We have to define _DLL for gcc based mingw version. This define is set by VC, when DLL-based runtime is used. So, gcc based runtime just have DLL-base runtime, therefore this define has to be set. As our headers are possibly used by windows compiler having a static C-runtime, we make this definition gnu compiler specific here. */ #if !defined (_DLL) && defined (__GNUC__) #define _DLL #endif #ifndef _MT #define _MT #endif #ifndef _MCRTIMP #define _MCRTIMP _CRTIMP #endif #ifndef _CRTIMP_PURE #define _CRTIMP_PURE _CRTIMP #endif #ifndef _PGLOBAL #define _PGLOBAL #endif #ifndef _AGLOBAL #define _AGLOBAL #endif #define _SECURECRT_FILL_BUFFER_PATTERN 0xFD #define _CRT_DEPRECATE_TEXT(_Text) __declspec(deprecated) #ifndef _CRT_INSECURE_DEPRECATE_MEMORY #define _CRT_INSECURE_DEPRECATE_MEMORY(_Replacement) #endif #ifndef _CRT_INSECURE_DEPRECATE_GLOBALS #define _CRT_INSECURE_DEPRECATE_GLOBALS(_Replacement) #endif #ifndef _CRT_MANAGED_HEAP_DEPRECATE #define _CRT_MANAGED_HEAP_DEPRECATE #endif #ifndef _CRT_OBSOLETE #define _CRT_OBSOLETE(_NewItem) #endif #ifndef __WIDL__ #if defined (_WIN32) && !defined (_WIN64) && !defined (__MINGW_USE_VC2005_COMPAT) && !defined (_UCRT) #ifndef _USE_32BIT_TIME_T #define _USE_32BIT_TIME_T #endif #endif #ifndef _CONST_RETURN #define _CONST_RETURN #endif #ifndef UNALIGNED #if defined(__ia64__) || defined(__x86_64__) #define UNALIGNED __unaligned #else #define UNALIGNED #endif #endif /* UNALIGNED */ #ifndef _CRT_ALIGN #ifdef _MSC_VER #define _CRT_ALIGN(x) __declspec(align(x)) #else /* __GNUC__ */ #define _CRT_ALIGN(x) __attribute__ ((__aligned__ (x))) #endif #endif /* _CRT_ALIGN */ #endif /* __WIDL__ */ #ifndef __CRTDECL #define __CRTDECL __cdecl #endif #define _ARGMAX 100 #ifndef _TRUNCATE #define _TRUNCATE ((size_t)-1) #endif #ifndef _CRT_UNUSED #define _CRT_UNUSED(x) (void)x #endif /* MSVC defines _NATIVE_NULLPTR_SUPPORTED when nullptr is supported. We emulate it here for GCC. */ #if __MINGW_GNUC_PREREQ(4, 6) #if defined(__GNUC__) && (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) #define _NATIVE_NULLPTR_SUPPORTED #endif #endif /* We are activating __USE_MINGW_ANSI_STDIO for various define indicators. * printf ll modifier (unsupported by msvcrt.dll) is required by C99 and C++11 standards. */ #if (defined (_POSIX) || defined (_POSIX_SOURCE) || defined (_POSIX_C_SOURCE) \ || defined (_ISOC99_SOURCE) \ || (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L && __MSVCRT_VERSION__ < 0xE00) \ || (defined (__cplusplus) && __cplusplus >= 201103L && __MSVCRT_VERSION__ < 0xE00) \ || defined (_XOPEN_SOURCE) || defined (_XOPEN_SOURCE_EXTENDED) \ || defined (_GNU_SOURCE) \ || defined (_SVID_SOURCE)) \ && !defined(__USE_MINGW_ANSI_STDIO) /* Enable __USE_MINGW_ANSI_STDIO if user did _not_ specify it explicitly... */ # define __USE_MINGW_ANSI_STDIO 1 #endif /* We are defining __USE_MINGW_ANSI_STDIO as 0 or 1 */ #if !defined(__USE_MINGW_ANSI_STDIO) #define __USE_MINGW_ANSI_STDIO 0 /* was not defined so it should be 0 */ #elif (__USE_MINGW_ANSI_STDIO + 0) != 0 || (1 - __USE_MINGW_ANSI_STDIO - 1) == 2 #define __USE_MINGW_ANSI_STDIO 1 /* was defined as nonzero or empty so it should be 1 */ #else #define __USE_MINGW_ANSI_STDIO 0 /* was defined as (int)zero and non-empty so it should be 0 */ #endif /* _dowildcard is an int that controls the globbing of the command line. * The MinGW32 (mingw.org) runtime calls it _CRT_glob, so we are adding * a compatibility definition here: you can use either of _CRT_glob or * _dowildcard . * If _dowildcard is non-zero, the command line will be globbed: *.* * will be expanded to be all files in the startup directory. * In the mingw-w64 library a _dowildcard variable is defined as being * 0, therefore command line globbing is DISABLED by default. To turn it * on and to leave wildcard command line processing MS's globbing code, * include a line in one of your source modules defining _dowildcard and * setting it to -1, like so: * int _dowildcard = -1; */ #undef _CRT_glob #define _CRT_glob _dowildcard #if defined(_MSC_VER) && !defined(_MSC_EXTENSIONS) #define NONAMELESSUNION 1 #endif #if defined(NONAMELESSSTRUCT) && \ !defined(NONAMELESSUNION) #define NONAMELESSUNION 1 #endif #if defined(NONAMELESSUNION) && \ !defined(NONAMELESSSTRUCT) #define NONAMELESSSTRUCT 1 #endif #ifndef __ANONYMOUS_DEFINED #define __ANONYMOUS_DEFINED #define _ANONYMOUS_UNION __MINGW_EXTENSION #define _ANONYMOUS_STRUCT __MINGW_EXTENSION #ifndef NONAMELESSUNION #define _UNION_NAME(x) #define _STRUCT_NAME(x) #else /* NONAMELESSUNION */ #define _UNION_NAME(x) x #define _STRUCT_NAME(x) x #endif #endif /* __ANONYMOUS_DEFINED */ #ifndef DUMMYUNIONNAME # ifdef NONAMELESSUNION # define DUMMYUNIONNAME u # define DUMMYUNIONNAME1 u1 /* Wine uses this variant */ # define DUMMYUNIONNAME2 u2 # define DUMMYUNIONNAME3 u3 # define DUMMYUNIONNAME4 u4 # define DUMMYUNIONNAME5 u5 # define DUMMYUNIONNAME6 u6 # define DUMMYUNIONNAME7 u7 # define DUMMYUNIONNAME8 u8 # define DUMMYUNIONNAME9 u9 # else /* NONAMELESSUNION */ # define DUMMYUNIONNAME # define DUMMYUNIONNAME1 /* Wine uses this variant */ # define DUMMYUNIONNAME2 # define DUMMYUNIONNAME3 # define DUMMYUNIONNAME4 # define DUMMYUNIONNAME5 # define DUMMYUNIONNAME6 # define DUMMYUNIONNAME7 # define DUMMYUNIONNAME8 # define DUMMYUNIONNAME9 # endif #endif /* DUMMYUNIONNAME */ #ifndef DUMMYSTRUCTNAME # ifdef NONAMELESSUNION # define DUMMYSTRUCTNAME s # define DUMMYSTRUCTNAME1 s1 /* Wine uses this variant */ # define DUMMYSTRUCTNAME2 s2 # define DUMMYSTRUCTNAME3 s3 # define DUMMYSTRUCTNAME4 s4 # define DUMMYSTRUCTNAME5 s5 # else # define DUMMYSTRUCTNAME # define DUMMYSTRUCTNAME1 /* Wine uses this variant */ # define DUMMYSTRUCTNAME2 # define DUMMYSTRUCTNAME3 # define DUMMYSTRUCTNAME4 # define DUMMYSTRUCTNAME5 # endif #endif /* DUMMYSTRUCTNAME */ /* Macros for __uuidof template-based emulation */ #if defined(__cplusplus) && (USE___UUIDOF == 0) #if __cpp_constexpr >= 200704l && __cpp_inline_variables >= 201606L #define __CRT_UUID_DECL(type,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) \ extern "C++" { \ template<> struct __mingw_uuidof_s { \ static constexpr IID __uuid_inst = { \ l,w1,w2, {b1,b2,b3,b4,b5,b6,b7,b8} \ }; \ }; \ template<> constexpr const GUID &__mingw_uuidof() { \ return __mingw_uuidof_s::__uuid_inst; \ } \ template<> constexpr const GUID &__mingw_uuidof() { \ return __mingw_uuidof_s::__uuid_inst; \ } \ } #else #define __CRT_UUID_DECL(type,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) \ extern "C++" { \ template<> inline const GUID &__mingw_uuidof() { \ static const IID __uuid_inst = {l,w1,w2, {b1,b2,b3,b4,b5,b6,b7,b8}}; \ return __uuid_inst; \ } \ template<> inline const GUID &__mingw_uuidof() { \ return __mingw_uuidof(); \ } \ } #endif #define __uuidof(type) __mingw_uuidof<__typeof(type)>() #else #define __CRT_UUID_DECL(type,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) #endif #ifdef __cplusplus extern "C" { #endif #ifdef __MINGW_INTRIN_INLINE #ifdef __has_builtin #define __MINGW_DEBUGBREAK_IMPL !__has_builtin(__debugbreak) #else #define __MINGW_DEBUGBREAK_IMPL 1 #endif #if __MINGW_DEBUGBREAK_IMPL == 1 void __cdecl __debugbreak(void); __MINGW_INTRIN_INLINE void __cdecl __debugbreak(void) { #if defined(__i386__) || defined(__x86_64__) __asm__ __volatile__("int {$}3":); #elif defined(__arm__) __asm__ __volatile__("udf #0xfe"); #elif defined(__aarch64__) __asm__ __volatile__("brk #0xf000"); #else __asm__ __volatile__("unimplemented"); #endif } #endif #endif /* mingw-w64 specific functions: */ const char *__mingw_get_crt_info (void); #ifdef __cplusplus } #endif #endif /* _INC__MINGW_H */ #ifndef MINGW_SDK_INIT #define MINGW_SDK_INIT /* for backward compatibility */ #ifndef MINGW_HAS_SECURE_API #define MINGW_HAS_SECURE_API 1 #endif #define __STDC_SECURE_LIB__ 200411L #define __GOT_SECURE_LIB__ __STDC_SECURE_LIB__ #ifndef __WIDL__ #include "sdks/_mingw_ddk.h" #endif #endif /* MINGW_SDK_INIT */ affxparser/src/_mingw_gcc463.h0000644000175200017520000004311414516003651017232 0ustar00biocbuildbiocbuild/** * This file has no copyright assigned and is placed in the Public Domain. * This file is part of the w64 mingw-runtime package. * No warranty is given; refer to the file DISCLAIMER.PD within this package. */ #ifndef _INC_CRTDEFS #define _INC_CRTDEFS #include "_mingw_mac.h" /* C/C++ specific language defines. */ #ifdef _WIN64 #ifdef __stdcall #undef __stdcall #endif #define __stdcall #endif #ifndef __GNUC__ # ifndef __MINGW_IMPORT # define __MINGW_IMPORT __declspec(dllimport) # endif # ifndef _CRTIMP # define _CRTIMP __declspec(dllimport) # endif # define __DECLSPEC_SUPPORTED # define __attribute__(x) /* nothing */ #else /* __GNUC__ */ # ifdef __declspec # ifndef __MINGW_IMPORT /* Note the extern. This is needed to work around GCC's limitations in handling dllimport attribute. */ # define __MINGW_IMPORT extern __attribute__ ((__dllimport__)) # endif # ifndef _CRTIMP # undef __USE_CRTIMP # if !defined (_CRTBLD) && !defined (_SYSCRT) # define __USE_CRTIMP 1 # endif # ifdef __USE_CRTIMP # define _CRTIMP __attribute__ ((__dllimport__)) # else # define _CRTIMP # endif # endif # define __DECLSPEC_SUPPORTED # else /* __declspec */ # undef __DECLSPEC_SUPPORTED # undef __MINGW_IMPORT # ifndef _CRTIMP # define _CRTIMP # endif # endif /* __declspec */ #endif /* __GNUC__ */ #if defined (__GNUC__) && defined (__GNUC_MINOR__) #define __MINGW_GNUC_PREREQ(major, minor) \ (__GNUC__ > (major) \ || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) #else #define __MINGW_GNUC_PREREQ(major, minor) 0 #endif #if defined (_MSC_VER) #define __MINGW_MSC_PREREQ(major, minor) \ (_MSC_VER >= (major * 100 + minor * 10)) #else #define __MINGW_MSC_PREREQ(major, minor) 0 #endif #ifdef _MSC_VER #define USE___UUIDOF 0 #else #define USE___UUIDOF 0 #endif #ifdef __cplusplus # define __CRT_INLINE inline #elif defined(_MSC_VER) # define __CRT_INLINE __inline #else # if ( __MINGW_GNUC_PREREQ(4, 3) && __STDC_VERSION__ >= 199901L) \ || (defined (__clang__)) # define __CRT_INLINE extern inline __attribute__((__gnu_inline__)) # else # define __CRT_INLINE extern __inline__ # endif #endif #if !defined(__MINGW_INTRIN_INLINE) && defined(__GNUC__) #define __MINGW_INTRIN_INLINE extern __inline__ __attribute__((__always_inline__,__gnu_inline__)) #endif #ifdef __NO_INLINE__ #undef __CRT__NO_INLINE #define __CRT__NO_INLINE 1 #endif #ifdef __cplusplus # define __UNUSED_PARAM(x) #else # ifdef __GNUC__ # define __UNUSED_PARAM(x) x __attribute__ ((__unused__)) # else # define __UNUSED_PARAM(x) x # endif #endif #ifndef __GNUC__ # ifdef _MSC_VER # define __restrict__ __restrict # else # define __restrict__ /* nothing */ # endif #endif /* !__GNUC__ */ #if __MINGW_GNUC_PREREQ (3,1) && !defined __GNUG__ # define __restrict_arr __restrict #elif defined(_MSC_VER) # define __restrict_arr __restrict #else # ifdef __GNUC__ # define __restrict_arr /* Not supported in old GCC. */ # else # if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L # define __restrict_arr restrict # else # define __restrict_arr /* Not supported. */ # endif # endif #endif #ifdef __GNUC__ #define __MINGW_ATTRIB_NORETURN __attribute__ ((__noreturn__)) #define __MINGW_ATTRIB_CONST __attribute__ ((__const__)) #elif __MINGW_MSC_PREREQ(12, 0) #define __MINGW_ATTRIB_NORETURN __declspec(noreturn) #define __MINGW_ATTRIB_CONST #else #define __MINGW_ATTRIB_NORETURN #define __MINGW_ATTRIB_CONST #endif #if __MINGW_GNUC_PREREQ (3, 0) #define __MINGW_ATTRIB_MALLOC __attribute__ ((__malloc__)) #define __MINGW_ATTRIB_PURE __attribute__ ((__pure__)) #elif __MINGW_MSC_PREREQ(14, 0) #define __MINGW_ATTRIB_MALLOC __declspec(noalias) __declspec(restrict) #define __MINGW_ATTRIB_PURE #else #define __MINGW_ATTRIB_MALLOC #define __MINGW_ATTRIB_PURE #endif /* Attribute `nonnull' was valid as of gcc 3.3. We don't use GCC's variadiac macro facility, because variadic macros cause syntax errors with --traditional-cpp. */ #if __MINGW_GNUC_PREREQ (3, 3) #define __MINGW_ATTRIB_NONNULL(arg) __attribute__ ((__nonnull__ (arg))) #else #define __MINGW_ATTRIB_NONNULL(arg) #endif /* GNUC >= 3.3 */ #ifdef __GNUC__ #define __MINGW_ATTRIB_UNUSED __attribute__ ((__unused__)) #else #define __MINGW_ATTRIB_UNUSED #endif /* ATTRIBUTE_UNUSED */ #if __MINGW_GNUC_PREREQ (3, 1) #define __MINGW_ATTRIB_USED __attribute__ ((__used__)) #define __MINGW_ATTRIB_DEPRECATED __attribute__ ((__deprecated__)) #elif __MINGW_MSC_PREREQ(12, 0) #define __MINGW_ATTRIB_USED #define __MINGW_ATTRIB_DEPRECATED __declspec(deprecated) #else #define __MINGW_ATTRIB_USED __MINGW_ATTRIB_UNUSED #define __MINGW_ATTRIB_DEPRECATED #endif /* GNUC >= 3.1 */ #if __MINGW_GNUC_PREREQ (3, 3) #define __MINGW_NOTHROW __attribute__ ((__nothrow__)) #elif __MINGW_MSC_PREREQ(12, 0) && defined (__cplusplus) #define __MINGW_NOTHROW __declspec(nothrow) #else #define __MINGW_NOTHROW #endif #if __MINGW_GNUC_PREREQ (4, 4) #define __MINGW_ATTRIB_NO_OPTIMIZE __attribute__((__optimize__ ("0"))) #else #define __MINGW_ATTRIB_NO_OPTIMIZE #endif #if __MINGW_GNUC_PREREQ (4, 4) #define __MINGW_PRAGMA_PARAM(x) _Pragma (#x) #elif __MINGW_MSC_PREREQ (13, 1) #define __MINGW_PRAGMA_PARAM(x) __pragma (x) #else #define __MINGW_PRAGMA_PARAM(x) #endif #define __MINGW_BROKEN_INTERFACE(x) \ __MINGW_PRAGMA_PARAM(message ("Interface " _CRT_STRINGIZE(x) \ " has unverified layout.")) #ifdef __MINGW_MSVC_COMPAT_WARNINGS # if __MINGW_GNUC_PREREQ (4, 5) # define __MINGW_ATTRIB_DEPRECATED_STR(X) __attribute__ ((__deprecated__ (X))) # else # define __MINGW_ATTRIB_DEPRECATED_STR(X) __MINGW_ATTRIB_DEPRECATED # endif #else # define __MINGW_ATTRIB_DEPRECATED_STR(X) #endif #define __MINGW_SEC_WARN_STR "This function or variable may be unsafe, use _CRT_SECURE_NO_WARNINGS to disable deprecation" #define __MINGW_MSVC2005_DEPREC_STR "This POSIX function is deprecated beginning in Visual C++ 2005, use _CRT_NONSTDC_NO_DEPRECATE to disable deprecation" #if !defined (_CRT_NONSTDC_NO_DEPRECATE) # define __MINGW_ATTRIB_DEPRECATED_MSVC2005 __MINGW_ATTRIB_DEPRECATED_STR (__MINGW_MSVC2005_DEPREC_STR) #else # define __MINGW_ATTRIB_DEPRECATED_MSVC2005 #endif #if !defined (_CRT_SECURE_NO_WARNINGS) # define __MINGW_ATTRIB_DEPRECATED_SEC_WARN __MINGW_ATTRIB_DEPRECATED_STR (__MINGW_SEC_WARN_STR) #else # define __MINGW_ATTRIB_DEPRECATED_SEC_WARN #endif #ifndef __MSVCRT_VERSION__ /* High byte is the major version, low byte is the minor. */ # define __MSVCRT_VERSION__ 0x0700 #endif #ifndef WINVER #define WINVER 0x0502 #endif #ifndef _WIN32_WINNT #define _WIN32_WINNT 0x502 #endif #ifndef _INT128_DEFINED #define _INT128_DEFINED #ifdef __GNUC__ #define __int8 char #define __int16 short #define __int32 int #define __int64 long long #ifdef _WIN64 #ifndef __SIZEOF_INT128__ typedef int __int128 __attribute__ ((__mode__ (TI))); #endif #endif #endif /* __GNUC__ */ #endif /* _INT128_DEFINED */ #ifdef __GNUC__ #define __ptr32 #define __ptr64 #ifndef __unaligned #define __unaligned #endif #ifndef __w64 #define __w64 #endif #ifdef __cplusplus #define __forceinline inline __attribute__((__always_inline__)) #else #define __forceinline extern __inline__ __attribute__((__always_inline__,__gnu_inline__)) #endif /* __cplusplus */ #endif /* __GNUC__ */ #ifndef _WIN32 #error Only Win32 target is supported! #endif #ifndef __nothrow #ifdef __cplusplus #define __nothrow __declspec(nothrow) #else #define __nothrow #endif #endif /* __nothrow */ #undef _CRT_PACKING #define _CRT_PACKING 8 #include /* other headers depend on this include */ #pragma pack(push,_CRT_PACKING) #ifndef _CRT_STRINGIZE #define __CRT_STRINGIZE(_Value) #_Value #define _CRT_STRINGIZE(_Value) __CRT_STRINGIZE(_Value) #endif /* _CRT_STRINGIZE */ #ifndef _CRT_WIDE #define __CRT_WIDE(_String) L ## _String #define _CRT_WIDE(_String) __CRT_WIDE(_String) #endif /* _CRT_WIDE */ #ifndef _W64 #define _W64 #endif #ifndef _CRTIMP_NOIA64 #ifdef __ia64__ #define _CRTIMP_NOIA64 #else #define _CRTIMP_NOIA64 _CRTIMP #endif #endif /* _CRTIMP_NOIA64 */ #ifndef _CRTIMP2 #define _CRTIMP2 _CRTIMP #endif #ifndef _CRTIMP_ALTERNATIVE #define _CRTIMP_ALTERNATIVE _CRTIMP #define _CRT_ALTERNATIVE_IMPORTED #endif /* _CRTIMP_ALTERNATIVE */ #ifndef _MRTIMP2 #define _MRTIMP2 _CRTIMP #endif /* We have to define _DLL for gcc based mingw version. This define is set by VC, when DLL-based runtime is used. So, gcc based runtime just have DLL-base runtime, therefore this define has to be set. As our headers are possibly used by windows compiler having a static C-runtime, we make this definition gnu compiler specific here. */ #if !defined (_DLL) && defined (__GNUC__) #define _DLL #endif #ifndef _MT #define _MT #endif #ifndef _MCRTIMP #define _MCRTIMP _CRTIMP #endif #ifndef _CRTIMP_PURE #define _CRTIMP_PURE _CRTIMP #endif #ifndef _PGLOBAL #define _PGLOBAL #endif #ifndef _AGLOBAL #define _AGLOBAL #endif #define _SECURECRT_FILL_BUFFER_PATTERN 0xFD #define _CRT_DEPRECATE_TEXT(_Text) __declspec(deprecated) #ifndef _CRT_INSECURE_DEPRECATE_MEMORY #define _CRT_INSECURE_DEPRECATE_MEMORY(_Replacement) #endif #ifndef _CRT_INSECURE_DEPRECATE_GLOBALS #define _CRT_INSECURE_DEPRECATE_GLOBALS(_Replacement) #endif #ifndef _CRT_MANAGED_HEAP_DEPRECATE #define _CRT_MANAGED_HEAP_DEPRECATE #endif #ifndef _CRT_OBSOLETE #define _CRT_OBSOLETE(_NewItem) #endif #ifndef _SIZE_T_DEFINED #define _SIZE_T_DEFINED #undef size_t #ifdef _WIN64 __MINGW_EXTENSION typedef unsigned __int64 size_t; #else typedef unsigned int size_t; #endif /* _WIN64 */ #endif /* _SIZE_T_DEFINED */ #ifndef _SSIZE_T_DEFINED #define _SSIZE_T_DEFINED #undef ssize_t #ifdef _WIN64 __MINGW_EXTENSION typedef __int64 ssize_t; #else typedef int ssize_t; #endif /* _WIN64 */ #endif /* _SSIZE_T_DEFINED */ #ifndef _INTPTR_T_DEFINED #define _INTPTR_T_DEFINED #ifndef __intptr_t_defined #define __intptr_t_defined #undef intptr_t #ifdef _WIN64 __MINGW_EXTENSION typedef __int64 intptr_t; #else typedef int intptr_t; #endif /* _WIN64 */ #endif /* __intptr_t_defined */ #endif /* _INTPTR_T_DEFINED */ #ifndef _UINTPTR_T_DEFINED #define _UINTPTR_T_DEFINED #ifndef __uintptr_t_defined #define __uintptr_t_defined #undef uintptr_t #ifdef _WIN64 __MINGW_EXTENSION typedef unsigned __int64 uintptr_t; #else typedef unsigned int uintptr_t; #endif /* _WIN64 */ #endif /* __uintptr_t_defined */ #endif /* _UINTPTR_T_DEFINED */ #ifndef _PTRDIFF_T_DEFINED #define _PTRDIFF_T_DEFINED #ifndef _PTRDIFF_T_ #define _PTRDIFF_T_ #undef ptrdiff_t #ifdef _WIN64 __MINGW_EXTENSION typedef __int64 ptrdiff_t; #else typedef int ptrdiff_t; #endif /* _WIN64 */ #endif /* _PTRDIFF_T_ */ #endif /* _PTRDIFF_T_DEFINED */ #ifndef _WCHAR_T_DEFINED #define _WCHAR_T_DEFINED #ifndef __cplusplus typedef unsigned short wchar_t; #endif /* C++ */ #endif /* _WCHAR_T_DEFINED */ #ifndef _WCTYPE_T_DEFINED #define _WCTYPE_T_DEFINED #ifndef _WINT_T #define _WINT_T typedef unsigned short wint_t; typedef unsigned short wctype_t; #endif /* _WINT_T */ #endif /* _WCTYPE_T_DEFINED */ #if defined (_WIN32) && !(defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64) && !defined (_WIN64) && !defined (__MINGW_USE_VC2005_COMPAT) #ifndef _USE_32BIT_TIME_T #define _USE_32BIT_TIME_T #endif #endif #ifdef _USE_32BIT_TIME_T #ifdef _WIN64 #error You cannot use 32-bit time_t (_USE_32BIT_TIME_T) with _WIN64 #undef _USE_32BIT_TIME_T #endif #endif /* _USE_32BIT_TIME_T */ #ifndef _ERRCODE_DEFINED #define _ERRCODE_DEFINED typedef int errno_t; #endif #ifndef _TIME32_T_DEFINED #define _TIME32_T_DEFINED typedef long __time32_t; #endif #ifndef _TIME64_T_DEFINED #define _TIME64_T_DEFINED __MINGW_EXTENSION typedef __int64 __time64_t; #endif /* _TIME64_T_DEFINED */ #ifndef _TIME_T_DEFINED #define _TIME_T_DEFINED #ifdef _USE_32BIT_TIME_T typedef __time32_t time_t; #else typedef __time64_t time_t; #endif #endif /* _TIME_T_DEFINED */ #ifndef _CONST_RETURN #define _CONST_RETURN #endif #ifndef UNALIGNED #if defined(_M_IA64) || defined(_M_AMD64) #define UNALIGNED __unaligned #else #define UNALIGNED #endif #endif /* UNALIGNED */ #ifndef _CRT_ALIGN #ifdef _MSC_VER #define _CRT_ALIGN(x) __declspec(align(x)) #else /* __GNUC__ */ #define _CRT_ALIGN(x) __attribute__ ((__aligned__ (x))) #endif #endif /* _CRT_ALIGN */ #ifndef __CRTDECL #define __CRTDECL __cdecl #endif #define _ARGMAX 100 #ifndef _TRUNCATE #define _TRUNCATE ((size_t)-1) #endif #ifndef _CRT_UNUSED #define _CRT_UNUSED(x) (void)x #endif #if defined(_POSIX) && !defined(__USE_MINGW_ANSI_STDIO) /* Enable __USE_MINGW_ANSI_STDIO if _POSIX defined * and If user did _not_ specify it explicitly... */ # define __USE_MINGW_ANSI_STDIO 1 #endif /* _dowildcard is an int that controls the globbing of the command line. * The MinGW32 (mingw.org) runtime calls it _CRT_glob, so we are adding * a compatibility definition here: you can use either of _CRT_glob or * _dowildcard . * If _dowildcard is non-zero, the command line will be globbed: *.* * will be expanded to be all files in the startup directory. * In the mingw-w64 library a _dowildcard variable is defined as being * 0, therefore command line globbing is DISABLED by default. To turn it * on and to leave wildcard command line processing MS's globbing code, * include a line in one of your source modules defining _dowildcard and * setting it to -1, like so: * int _dowildcard = -1; */ #undef _CRT_glob #define _CRT_glob _dowildcard #if defined(_MSC_VER) && !defined(_MSC_EXTENSIONS) #define NONAMELESSUNION 1 #endif #if defined(NONAMELESSSTRUCT) && \ !defined(NONAMELESSUNION) #define NONAMELESSUNION 1 #endif #if defined(NONAMELESSUNION) && \ !defined(NONAMELESSSTRUCT) #define NONAMELESSSTRUCT 1 #endif #ifndef __ANONYMOUS_DEFINED #define __ANONYMOUS_DEFINED #define _ANONYMOUS_UNION __MINGW_EXTENSION #define _ANONYMOUS_STRUCT __MINGW_EXTENSION #ifndef NONAMELESSUNION #define _UNION_NAME(x) #define _STRUCT_NAME(x) #else /* NONAMELESSUNION */ #define _UNION_NAME(x) x #define _STRUCT_NAME(x) x #endif #endif /* __ANONYMOUS_DEFINED */ #ifndef DUMMYUNIONNAME # ifdef NONAMELESSUNION # define DUMMYUNIONNAME u # define DUMMYUNIONNAME1 u1 /* Wine uses this variant */ # define DUMMYUNIONNAME2 u2 # define DUMMYUNIONNAME3 u3 # define DUMMYUNIONNAME4 u4 # define DUMMYUNIONNAME5 u5 # define DUMMYUNIONNAME6 u6 # define DUMMYUNIONNAME7 u7 # define DUMMYUNIONNAME8 u8 # define DUMMYUNIONNAME9 u9 # else /* NONAMELESSUNION */ # define DUMMYUNIONNAME # define DUMMYUNIONNAME1 /* Wine uses this variant */ # define DUMMYUNIONNAME2 # define DUMMYUNIONNAME3 # define DUMMYUNIONNAME4 # define DUMMYUNIONNAME5 # define DUMMYUNIONNAME6 # define DUMMYUNIONNAME7 # define DUMMYUNIONNAME8 # define DUMMYUNIONNAME9 # endif #endif /* DUMMYUNIONNAME */ #ifndef DUMMYSTRUCTNAME # ifdef NONAMELESSUNION # define DUMMYSTRUCTNAME s # define DUMMYSTRUCTNAME1 s1 /* Wine uses this variant */ # define DUMMYSTRUCTNAME2 s2 # define DUMMYSTRUCTNAME3 s3 # define DUMMYSTRUCTNAME4 s4 # define DUMMYSTRUCTNAME5 s5 # else # define DUMMYSTRUCTNAME # define DUMMYSTRUCTNAME1 /* Wine uses this variant */ # define DUMMYSTRUCTNAME2 # define DUMMYSTRUCTNAME3 # define DUMMYSTRUCTNAME4 # define DUMMYSTRUCTNAME5 # endif #endif /* DUMMYSTRUCTNAME */ /* Macros for __uuidof template-based emulation */ #if defined(__cplusplus) && (USE___UUIDOF == 0) #define __CRT_UUID_DECL(type,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) \ extern "C++" { \ template<> inline const GUID &__mingw_uuidof() { \ return (const IID){l,w1,w2, {b1,b2,b3,b4,b5,b6,b7,b8}}; \ } \ template<> inline const GUID &__mingw_uuidof() { \ return __mingw_uuidof(); \ } \ } #define __uuidof(type) __mingw_uuidof() #else #define __CRT_UUID_DECL(type,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) #endif /* MSVC-isms: */ struct threadlocaleinfostruct; struct threadmbcinfostruct; typedef struct threadlocaleinfostruct *pthreadlocinfo; typedef struct threadmbcinfostruct *pthreadmbcinfo; struct __lc_time_data; typedef struct localeinfo_struct { pthreadlocinfo locinfo; pthreadmbcinfo mbcinfo; } _locale_tstruct,*_locale_t; #ifndef _TAGLC_ID_DEFINED #define _TAGLC_ID_DEFINED typedef struct tagLC_ID { unsigned short wLanguage; unsigned short wCountry; unsigned short wCodePage; } LC_ID,*LPLC_ID; #endif /* _TAGLC_ID_DEFINED */ #ifndef _THREADLOCALEINFO #define _THREADLOCALEINFO typedef struct threadlocaleinfostruct { int refcount; unsigned int lc_codepage; unsigned int lc_collate_cp; unsigned long lc_handle[6]; LC_ID lc_id[6]; struct { char *locale; wchar_t *wlocale; int *refcount; int *wrefcount; } lc_category[6]; int lc_clike; int mb_cur_max; int *lconv_intl_refcount; int *lconv_num_refcount; int *lconv_mon_refcount; struct lconv *lconv; int *ctype1_refcount; unsigned short *ctype1; const unsigned short *pctype; const unsigned char *pclmap; const unsigned char *pcumap; struct __lc_time_data *lc_time_curr; } threadlocinfo; #endif /* _THREADLOCALEINFO */ #ifdef __cplusplus extern "C" { #endif #ifdef __MINGW_INTRIN_INLINE #if !defined (__clang__) void __cdecl __debugbreak(void); __MINGW_INTRIN_INLINE void __cdecl __debugbreak(void) { __asm__ __volatile__("int $3"); } #endif #endif /* mingw-w64 specific functions: */ const char *__mingw_get_crt_info (void); #ifdef __cplusplus } #endif #pragma pack(pop) #endif /* _INC_CRTDEFS */ #ifndef MINGW_SDK_INIT #define MINGW_SDK_INIT #define MINGW_HAS_SECURE_API 1 #ifdef MINGW_HAS_SECURE_API #define __STDC_SECURE_LIB__ 200411L #define __GOT_SECURE_LIB__ __STDC_SECURE_LIB__ #endif #include "sdks/_mingw_directx.h" #include "sdks/_mingw_ddk.h" #endif /* MINGW_SDK_INIT */ affxparser/src/_mingw_gcc493.h0000644000175200017520000004151714516003651017242 0ustar00biocbuildbiocbuild/** * This file has no copyright assigned and is placed in the Public Domain. * This file is part of the mingw-w64 runtime package. * No warranty is given; refer to the file DISCLAIMER.PD within this package. */ #ifndef _INC__MINGW_H #define _INC__MINGW_H #define MINGW_HAS_SECURE_API 1 #include "_mingw_mac.h" #include "_mingw_secapi.h" /* Include _cygwin.h if we're building a Cygwin application. */ #ifdef __CYGWIN__ #include "_cygwin.h" #endif /* Target specific macro replacement for type "long". In the Windows API, the type long is always 32 bit, even if the target is 64 bit (LLP64). On 64 bit Cygwin, the type long is 64 bit (LP64). So, to get the right sized definitions and declarations, all usage of type long in the Windows headers have to be replaced by the below defined macro __LONG32. */ #ifndef __LP64__ /* 32 bit target, 64 bit Mingw target */ #define __LONG32 long #else /* 64 bit Cygwin target */ #define __LONG32 int #endif /* C/C++ specific language defines. */ #ifdef _WIN64 #ifdef __stdcall #undef __stdcall #endif #define __stdcall #endif #ifndef __GNUC__ # ifndef __MINGW_IMPORT # define __MINGW_IMPORT __declspec(dllimport) # endif # ifndef _CRTIMP # define _CRTIMP __declspec(dllimport) # endif # define __DECLSPEC_SUPPORTED # define __attribute__(x) /* nothing */ #else /* __GNUC__ */ # ifdef __declspec # ifndef __MINGW_IMPORT /* Note the extern. This is needed to work around GCC's limitations in handling dllimport attribute. */ # define __MINGW_IMPORT extern __attribute__ ((__dllimport__)) # endif # ifndef _CRTIMP # undef __USE_CRTIMP # if !defined (_CRTBLD) && !defined (_SYSCRT) # define __USE_CRTIMP 1 # endif # ifdef __USE_CRTIMP # define _CRTIMP __attribute__ ((__dllimport__)) # else # define _CRTIMP # endif # endif # define __DECLSPEC_SUPPORTED # else /* __declspec */ # undef __DECLSPEC_SUPPORTED # undef __MINGW_IMPORT # ifndef _CRTIMP # define _CRTIMP # endif # endif /* __declspec */ #endif /* __GNUC__ */ #ifdef _MSC_VER #define USE___UUIDOF 0 #else #define USE___UUIDOF 0 #endif #if !defined(_MSC_VER) && !defined(_inline) #define _inline __inline #endif #ifdef __cplusplus # define __CRT_INLINE inline #elif defined(_MSC_VER) # define __CRT_INLINE __inline #else # if ( __MINGW_GNUC_PREREQ(4, 3) && __STDC_VERSION__ >= 199901L) \ || (defined (__clang__)) # define __CRT_INLINE extern inline __attribute__((__gnu_inline__)) # else # define __CRT_INLINE extern __inline__ # endif #endif #if !defined(__MINGW_INTRIN_INLINE) && defined(__GNUC__) #define __MINGW_INTRIN_INLINE extern __inline__ __attribute__((__always_inline__,__gnu_inline__)) #endif #ifndef __CYGWIN__ #ifdef __NO_INLINE__ #undef __CRT__NO_INLINE #define __CRT__NO_INLINE 1 #endif #endif #ifdef __cplusplus # define __UNUSED_PARAM(x) #else # ifdef __GNUC__ # define __UNUSED_PARAM(x) x __attribute__ ((__unused__)) # else # define __UNUSED_PARAM(x) x # endif #endif #ifndef __GNUC__ # ifdef _MSC_VER # define __restrict__ __restrict # else # define __restrict__ /* nothing */ # endif #endif /* !__GNUC__ */ #if __MINGW_GNUC_PREREQ (3,1) && !defined __GNUG__ # define __restrict_arr __restrict #elif defined(_MSC_VER) # define __restrict_arr __restrict #else # ifdef __GNUC__ # define __restrict_arr /* Not supported in old GCC. */ # else # if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L # define __restrict_arr restrict # else # define __restrict_arr /* Not supported. */ # endif # endif #endif #ifdef __GNUC__ #define __MINGW_ATTRIB_NORETURN __attribute__ ((__noreturn__)) #define __MINGW_ATTRIB_CONST __attribute__ ((__const__)) #elif __MINGW_MSC_PREREQ(12, 0) #define __MINGW_ATTRIB_NORETURN __declspec(noreturn) #define __MINGW_ATTRIB_CONST #else #define __MINGW_ATTRIB_NORETURN #define __MINGW_ATTRIB_CONST #endif #if __MINGW_GNUC_PREREQ (3, 0) #define __MINGW_ATTRIB_MALLOC __attribute__ ((__malloc__)) #define __MINGW_ATTRIB_PURE __attribute__ ((__pure__)) #elif __MINGW_MSC_PREREQ(14, 0) #define __MINGW_ATTRIB_MALLOC __declspec(noalias) __declspec(restrict) #define __MINGW_ATTRIB_PURE #else #define __MINGW_ATTRIB_MALLOC #define __MINGW_ATTRIB_PURE #endif /* Attribute `nonnull' was valid as of gcc 3.3. We don't use GCC's variadiac macro facility, because variadic macros cause syntax errors with --traditional-cpp. */ #if __MINGW_GNUC_PREREQ (3, 3) #define __MINGW_ATTRIB_NONNULL(arg) __attribute__ ((__nonnull__ (arg))) #else #define __MINGW_ATTRIB_NONNULL(arg) #endif /* GNUC >= 3.3 */ #ifdef __GNUC__ #define __MINGW_ATTRIB_UNUSED __attribute__ ((__unused__)) #else #define __MINGW_ATTRIB_UNUSED #endif /* ATTRIBUTE_UNUSED */ #if __MINGW_GNUC_PREREQ (3, 1) #define __MINGW_ATTRIB_USED __attribute__ ((__used__)) #define __MINGW_ATTRIB_DEPRECATED __attribute__ ((__deprecated__)) #elif __MINGW_MSC_PREREQ(12, 0) #define __MINGW_ATTRIB_USED #define __MINGW_ATTRIB_DEPRECATED __declspec(deprecated) #else #define __MINGW_ATTRIB_USED __MINGW_ATTRIB_UNUSED #define __MINGW_ATTRIB_DEPRECATED #endif /* GNUC >= 3.1 */ #if __MINGW_GNUC_PREREQ (3, 3) #define __MINGW_NOTHROW __attribute__ ((__nothrow__)) #elif __MINGW_MSC_PREREQ(12, 0) && defined (__cplusplus) #define __MINGW_NOTHROW __declspec(nothrow) #else #define __MINGW_NOTHROW #endif #if __MINGW_GNUC_PREREQ (4, 4) #define __MINGW_ATTRIB_NO_OPTIMIZE __attribute__((__optimize__ ("0"))) #else #define __MINGW_ATTRIB_NO_OPTIMIZE #endif #if __MINGW_GNUC_PREREQ (4, 4) #define __MINGW_PRAGMA_PARAM(x) _Pragma (#x) #elif __MINGW_MSC_PREREQ (13, 1) #define __MINGW_PRAGMA_PARAM(x) __pragma (x) #else #define __MINGW_PRAGMA_PARAM(x) #endif #define __MINGW_BROKEN_INTERFACE(x) \ __MINGW_PRAGMA_PARAM(message ("Interface " _CRT_STRINGIZE(x) \ " has unverified layout.")) #ifndef __MSVCRT_VERSION__ /* High byte is the major version, low byte is the minor. */ # define __MSVCRT_VERSION__ 0x0700 #endif #ifndef WINVER #define WINVER 0x0502 #endif #ifndef _WIN32_WINNT #define _WIN32_WINNT 0x502 #endif #ifndef _INT128_DEFINED #define _INT128_DEFINED #ifdef __GNUC__ #define __int8 char #define __int16 short #define __int32 int #define __int64 long long #ifdef _WIN64 #if (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 1)) && \ !defined(__SIZEOF_INT128__) /* clang >= 3.1 has __int128 but no size macro */ #define __SIZEOF_INT128__ 16 #endif #ifndef __SIZEOF_INT128__ typedef int __int128 __attribute__ ((__mode__ (TI))); #endif #endif #endif /* __GNUC__ */ #endif /* _INT128_DEFINED */ #ifdef __GNUC__ #define __ptr32 #define __ptr64 #ifndef __unaligned #define __unaligned #endif #ifndef __w64 #define __w64 #endif #ifdef __cplusplus #define __forceinline inline __attribute__((__always_inline__)) #else #define __forceinline extern __inline__ __attribute__((__always_inline__,__gnu_inline__)) #endif /* __cplusplus */ #endif /* __GNUC__ */ #if !defined(_WIN32) && !defined(__CYGWIN__) #error Only Win32 target is supported! #endif #ifndef __nothrow #ifdef __cplusplus #define __nothrow __declspec(nothrow) #else #define __nothrow #endif #endif /* __nothrow */ #undef _CRT_PACKING #define _CRT_PACKING 8 #include /* other headers depend on this include */ #ifndef __WIDL__ #pragma pack(push,_CRT_PACKING) #endif #ifndef _CRT_STRINGIZE #define __CRT_STRINGIZE(_Value) #_Value #define _CRT_STRINGIZE(_Value) __CRT_STRINGIZE(_Value) #endif /* _CRT_STRINGIZE */ #ifndef _CRT_WIDE #define __CRT_WIDE(_String) L ## _String #define _CRT_WIDE(_String) __CRT_WIDE(_String) #endif /* _CRT_WIDE */ #ifndef _W64 #define _W64 #endif #ifndef _CRTIMP_NOIA64 #ifdef __ia64__ #define _CRTIMP_NOIA64 #else #define _CRTIMP_NOIA64 _CRTIMP #endif #endif /* _CRTIMP_NOIA64 */ #ifndef _CRTIMP2 #define _CRTIMP2 _CRTIMP #endif #ifndef _CRTIMP_ALTERNATIVE #define _CRTIMP_ALTERNATIVE _CRTIMP #define _CRT_ALTERNATIVE_IMPORTED #endif /* _CRTIMP_ALTERNATIVE */ #ifndef _MRTIMP2 #define _MRTIMP2 _CRTIMP #endif /* We have to define _DLL for gcc based mingw version. This define is set by VC, when DLL-based runtime is used. So, gcc based runtime just have DLL-base runtime, therefore this define has to be set. As our headers are possibly used by windows compiler having a static C-runtime, we make this definition gnu compiler specific here. */ #if !defined (_DLL) && defined (__GNUC__) #define _DLL #endif #ifndef _MT #define _MT #endif #ifndef _MCRTIMP #define _MCRTIMP _CRTIMP #endif #ifndef _CRTIMP_PURE #define _CRTIMP_PURE _CRTIMP #endif #ifndef _PGLOBAL #define _PGLOBAL #endif #ifndef _AGLOBAL #define _AGLOBAL #endif #define _SECURECRT_FILL_BUFFER_PATTERN 0xFD #define _CRT_DEPRECATE_TEXT(_Text) __declspec(deprecated) #ifndef _CRT_INSECURE_DEPRECATE_MEMORY #define _CRT_INSECURE_DEPRECATE_MEMORY(_Replacement) #endif #ifndef _CRT_INSECURE_DEPRECATE_GLOBALS #define _CRT_INSECURE_DEPRECATE_GLOBALS(_Replacement) #endif #ifndef _CRT_MANAGED_HEAP_DEPRECATE #define _CRT_MANAGED_HEAP_DEPRECATE #endif #ifndef _CRT_OBSOLETE #define _CRT_OBSOLETE(_NewItem) #endif #ifndef __WIDL__ #ifndef _SIZE_T_DEFINED #define _SIZE_T_DEFINED #undef size_t #ifdef _WIN64 __MINGW_EXTENSION typedef unsigned __int64 size_t; #else typedef unsigned int size_t; #endif /* _WIN64 */ #endif /* _SIZE_T_DEFINED */ #ifndef _SSIZE_T_DEFINED #define _SSIZE_T_DEFINED #undef ssize_t #ifdef _WIN64 __MINGW_EXTENSION typedef __int64 ssize_t; #else typedef int ssize_t; #endif /* _WIN64 */ #endif /* _SSIZE_T_DEFINED */ #ifndef _INTPTR_T_DEFINED #define _INTPTR_T_DEFINED #ifndef __intptr_t_defined #define __intptr_t_defined #undef intptr_t #ifdef _WIN64 __MINGW_EXTENSION typedef __int64 intptr_t; #else typedef int intptr_t; #endif /* _WIN64 */ #endif /* __intptr_t_defined */ #endif /* _INTPTR_T_DEFINED */ #ifndef _UINTPTR_T_DEFINED #define _UINTPTR_T_DEFINED #ifndef __uintptr_t_defined #define __uintptr_t_defined #undef uintptr_t #ifdef _WIN64 __MINGW_EXTENSION typedef unsigned __int64 uintptr_t; #else typedef unsigned int uintptr_t; #endif /* _WIN64 */ #endif /* __uintptr_t_defined */ #endif /* _UINTPTR_T_DEFINED */ #ifndef _PTRDIFF_T_DEFINED #define _PTRDIFF_T_DEFINED #ifndef _PTRDIFF_T_ #define _PTRDIFF_T_ #undef ptrdiff_t #ifdef _WIN64 __MINGW_EXTENSION typedef __int64 ptrdiff_t; #else typedef int ptrdiff_t; #endif /* _WIN64 */ #endif /* _PTRDIFF_T_ */ #endif /* _PTRDIFF_T_DEFINED */ #ifndef _WCHAR_T_DEFINED #define _WCHAR_T_DEFINED #if !defined(__cplusplus) && !defined(__WIDL__) typedef unsigned short wchar_t; #endif /* C++ */ #endif /* _WCHAR_T_DEFINED */ #ifndef _WCTYPE_T_DEFINED #define _WCTYPE_T_DEFINED #ifndef _WINT_T #define _WINT_T typedef unsigned short wint_t; typedef unsigned short wctype_t; #endif /* _WINT_T */ #endif /* _WCTYPE_T_DEFINED */ #if defined (_WIN32) && !defined (_WIN64) && !defined (__MINGW_USE_VC2005_COMPAT) #ifndef _USE_32BIT_TIME_T #define _USE_32BIT_TIME_T #endif #endif #ifdef _USE_32BIT_TIME_T #ifdef _WIN64 #error You cannot use 32-bit time_t (_USE_32BIT_TIME_T) with _WIN64 #undef _USE_32BIT_TIME_T #endif #endif /* _USE_32BIT_TIME_T */ #ifndef _ERRCODE_DEFINED #define _ERRCODE_DEFINED typedef int errno_t; #endif #ifndef _TIME32_T_DEFINED #define _TIME32_T_DEFINED typedef long __time32_t; #endif #ifndef _TIME64_T_DEFINED #define _TIME64_T_DEFINED __MINGW_EXTENSION typedef __int64 __time64_t; #endif /* _TIME64_T_DEFINED */ #ifndef _TIME_T_DEFINED #define _TIME_T_DEFINED #ifdef _USE_32BIT_TIME_T typedef __time32_t time_t; #else typedef __time64_t time_t; #endif #endif /* _TIME_T_DEFINED */ #ifndef _CONST_RETURN #define _CONST_RETURN #endif #ifndef UNALIGNED #if defined(_M_IA64) || defined(_M_AMD64) #define UNALIGNED __unaligned #else #define UNALIGNED #endif #endif /* UNALIGNED */ #ifndef _CRT_ALIGN #ifdef _MSC_VER #define _CRT_ALIGN(x) __declspec(align(x)) #else /* __GNUC__ */ #define _CRT_ALIGN(x) __attribute__ ((__aligned__ (x))) #endif #endif /* _CRT_ALIGN */ #endif /* __WIDL__ */ #ifndef __CRTDECL #define __CRTDECL __cdecl #endif #define _ARGMAX 100 #ifndef _TRUNCATE #define _TRUNCATE ((size_t)-1) #endif #ifndef _CRT_UNUSED #define _CRT_UNUSED(x) (void)x #endif /* MSVC defines _NATIVE_NULLPTR_SUPPORTED when nullptr is supported. We emulate it here for GCC. */ #if __MINGW_GNUC_PREREQ(4, 6) #if defined(__GNUC__) && (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) #define _NATIVE_NULLPTR_SUPPORTED #endif #endif /* We are activating __USE_MINGW_ANSI_STDIO for various define indicators. Note that we enable it also for _GNU_SOURCE in C++, but not for C case. */ #if (defined (_POSIX) || defined (_POSIX_SOURCE) || defined (_POSIX_C_SOURCE) \ || defined (_ISOC99_SOURCE) \ || defined (_XOPEN_SOURCE) || defined (_XOPEN_SOURCE_EXTENDED) \ || (defined (_GNU_SOURCE) && defined (__cplusplus)) \ || defined (_SVID_SOURCE)) \ && !defined(__USE_MINGW_ANSI_STDIO) /* Enable __USE_MINGW_ANSI_STDIO if _POSIX defined * and If user did _not_ specify it explicitly... */ # define __USE_MINGW_ANSI_STDIO 1 #endif /* _dowildcard is an int that controls the globbing of the command line. * The MinGW32 (mingw.org) runtime calls it _CRT_glob, so we are adding * a compatibility definition here: you can use either of _CRT_glob or * _dowildcard . * If _dowildcard is non-zero, the command line will be globbed: *.* * will be expanded to be all files in the startup directory. * In the mingw-w64 library a _dowildcard variable is defined as being * 0, therefore command line globbing is DISABLED by default. To turn it * on and to leave wildcard command line processing MS's globbing code, * include a line in one of your source modules defining _dowildcard and * setting it to -1, like so: * int _dowildcard = -1; */ #undef _CRT_glob #define _CRT_glob _dowildcard #if defined(_MSC_VER) && !defined(_MSC_EXTENSIONS) #define NONAMELESSUNION 1 #endif #if defined(NONAMELESSSTRUCT) && \ !defined(NONAMELESSUNION) #define NONAMELESSUNION 1 #endif #if defined(NONAMELESSUNION) && \ !defined(NONAMELESSSTRUCT) #define NONAMELESSSTRUCT 1 #endif #ifndef __ANONYMOUS_DEFINED #define __ANONYMOUS_DEFINED #define _ANONYMOUS_UNION __MINGW_EXTENSION #define _ANONYMOUS_STRUCT __MINGW_EXTENSION #ifndef NONAMELESSUNION #define _UNION_NAME(x) #define _STRUCT_NAME(x) #else /* NONAMELESSUNION */ #define _UNION_NAME(x) x #define _STRUCT_NAME(x) x #endif #endif /* __ANONYMOUS_DEFINED */ #ifndef DUMMYUNIONNAME # ifdef NONAMELESSUNION # define DUMMYUNIONNAME u # define DUMMYUNIONNAME1 u1 /* Wine uses this variant */ # define DUMMYUNIONNAME2 u2 # define DUMMYUNIONNAME3 u3 # define DUMMYUNIONNAME4 u4 # define DUMMYUNIONNAME5 u5 # define DUMMYUNIONNAME6 u6 # define DUMMYUNIONNAME7 u7 # define DUMMYUNIONNAME8 u8 # define DUMMYUNIONNAME9 u9 # else /* NONAMELESSUNION */ # define DUMMYUNIONNAME # define DUMMYUNIONNAME1 /* Wine uses this variant */ # define DUMMYUNIONNAME2 # define DUMMYUNIONNAME3 # define DUMMYUNIONNAME4 # define DUMMYUNIONNAME5 # define DUMMYUNIONNAME6 # define DUMMYUNIONNAME7 # define DUMMYUNIONNAME8 # define DUMMYUNIONNAME9 # endif #endif /* DUMMYUNIONNAME */ #ifndef DUMMYSTRUCTNAME # ifdef NONAMELESSUNION # define DUMMYSTRUCTNAME s # define DUMMYSTRUCTNAME1 s1 /* Wine uses this variant */ # define DUMMYSTRUCTNAME2 s2 # define DUMMYSTRUCTNAME3 s3 # define DUMMYSTRUCTNAME4 s4 # define DUMMYSTRUCTNAME5 s5 # else # define DUMMYSTRUCTNAME # define DUMMYSTRUCTNAME1 /* Wine uses this variant */ # define DUMMYSTRUCTNAME2 # define DUMMYSTRUCTNAME3 # define DUMMYSTRUCTNAME4 # define DUMMYSTRUCTNAME5 # endif #endif /* DUMMYSTRUCTNAME */ /* Macros for __uuidof template-based emulation */ #if defined(__cplusplus) && (USE___UUIDOF == 0) #define __CRT_UUID_DECL(type,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) \ extern "C++" { \ template<> inline const GUID &__mingw_uuidof() { \ static const IID __uuid_inst = {l,w1,w2, {b1,b2,b3,b4,b5,b6,b7,b8}}; \ return __uuid_inst; \ } \ template<> inline const GUID &__mingw_uuidof() { \ return __mingw_uuidof(); \ } \ } #define __uuidof(type) __mingw_uuidof<__typeof(type)>() #else #define __CRT_UUID_DECL(type,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) #endif #ifdef __cplusplus extern "C" { #endif #ifdef __MINGW_INTRIN_INLINE #if !defined (__clang__) void __cdecl __debugbreak(void); __MINGW_INTRIN_INLINE void __cdecl __debugbreak(void) { __asm__ __volatile__("int {$}3":); } #endif #endif /* mingw-w64 specific functions: */ const char *__mingw_get_crt_info (void); #ifdef __cplusplus } #endif #ifndef __WIDL__ #pragma pack(pop) #endif #endif /* _INC__MINGW_H */ #ifndef MINGW_SDK_INIT #define MINGW_SDK_INIT #ifdef MINGW_HAS_SECURE_API #define __STDC_SECURE_LIB__ 200411L #define __GOT_SECURE_LIB__ __STDC_SECURE_LIB__ #endif #ifndef __WIDL__ #include "sdks/_mingw_directx.h" #include "sdks/_mingw_ddk.h" #endif #endif /* MINGW_SDK_INIT */ affxparser/src/_mingw_preR42.h0000644000175200017520000000137114516003651017316 0ustar00biocbuildbiocbuild/***************************************************************** This header file is used when compiling with MinGW on Windows. It is agile to the gcc toolchain currently used; it uses different setups for: - gcc (>= 4.9.3): introduced in R (>= 3.3.0) - gcc (>= 4.6.3): R (< 3.3.0) and some R (>= 3.3.0) installs Henrik Bengtsson, 2016-04-05 *****************************************************************/ #define GCC_VERSION (__GNUC__ * 10000 \ + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) #if GCC_VERSION >= 40903 #include "_mingw_gcc493.h" #elif GCC_VERSION >= 40603 #include "_mingw_gcc463.h" #else /* Fall back assuming gcc 4.6.3 toolchain */ #include "_mingw_gcc463.h" #endif affxparser/src/fusion/0000755000175200017520000000000014516003651016030 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/0000755000175200017520000000000014516003651020466 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/array/0000755000175200017520000000000014516003651021604 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/array/src/0000755000175200017520000000000014516003651022373 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/array/src/ArrayAttributes.cpp0000644000175200017520000000276114516003651026232 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/array/src/ArrayAttributes.h" // using namespace affymetrix_calvin_array; /* * Initialize the class. */ ArrayAttributes::ArrayAttributes() { Clear(); } /* * Clean up. */ ArrayAttributes::~ArrayAttributes() { Clear(); } /* * Clears the member variables. */ void ArrayAttributes::Clear() { attributes.clear(); id.clear(); arrayName.clear(); arrayBarcode.clear(); media=CartridgeMedia; mediaRow=0; mediaCol=0; mediaFileName.clear(); mediaFileGUID.clear(); libraryPackageName.clear(); masterFile.clear(); masterFileId.clear(); patAssignment = NoAssignment; creationDateTime.clear(); createdBy.clear(); comment.clear(); } affxparser/src/fusion/calvin_files/array/src/ArrayAttributes.h0000644000175200017520000001241314516003651025672 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ArrayAttributes_HEADER_ #define _ArrayAttributes_HEADER_ /*! \file ArrayAttributes.h This file provides interfaces to store attributes about a physical array. */ #include "calvin_files/array/src/ArrayMedia.h" #include "calvin_files/array/src/CreateStep.h" #include "calvin_files/array/src/PATAssignment.h" #include "calvin_files/parameter/src/Parameter.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include // namespace affymetrix_calvin_array { /*! This class provides interfaces to store physical array attributes. */ class ArrayAttributes { public: /*! Constructor */ ArrayAttributes(); /*! Destructor */ ~ArrayAttributes(); protected: /*! A unique idendifier for the array object */ affymetrix_calvin_utilities::AffymetrixGuidType id; /*! The array attributes */ affymetrix_calvin_parameter::ParameterNameValuePairVector attributes; /*! The array name. */ std::string arrayName; /*! The barcode on the array cartridge. */ std::string arrayBarcode; /*! The type of assembly. */ ArrayMedia media; /*! The row number of the plate or strip. */ int mediaRow; /*! The column number of the plate or strip. */ int mediaCol; /*! The name of the media file. */ std::string mediaFileName; /*! The master file guid. */ affymetrix_calvin_utilities::AffymetrixGuidType mediaFileGUID; /*! The name of the library file package. */ std::string libraryPackageName; /*! The associated master file. */ std::string masterFile; /*! A unique idendifier for the master file */ affymetrix_calvin_utilities::AffymetrixGuidType masterFileId; /*! The method the probe array type was assigned. */ PATAssignmentMethod patAssignment; /*! The date the array object was created. */ std::wstring creationDateTime; /*! The user who created the data object. */ std::wstring createdBy; /*! A user comment. */ std::wstring comment; /*! The step in Calvin that created the array set data. */ CreateStep createdStep; public: /*! The unique idendifier for the object. * @return The unique idendifier for the object. */ affymetrix_calvin_utilities::AffymetrixGuidType &Identifier() { return id; } /*! The array name. * @return The array name. */ std::string &ArrayName() { return arrayName; } /*! The barcode on the array cartridge. * @return The barcode. */ std::string &ArrayBarcode() { return arrayBarcode; } /*! The type of assembly. * @return The assembly type. */ ArrayMedia &Media() { return media; } /*! The row number of the media or strip. * @return The row. */ int &MediaRow() { return mediaRow; } /*! The column number of the media or strip. * @return The column. */ int &MediaCol() { return mediaCol; } /*! The name of the media file. * @return The media file name. */ std::string &MediaFileName() { return mediaFileName; } /*! The master file guid. * @return The master file guid. */ affymetrix_calvin_utilities::AffymetrixGuidType &MediaFileGUID() { return mediaFileGUID; } /*! The name of the library file package. * @return The package name */ std::string &LibraryPackageName() { return libraryPackageName; } /*! The associated master file. * @return The master file name. */ std::string &MasterFile() { return masterFile; } /*! The unique idendifier for the master file. * @return The master file guid. */ affymetrix_calvin_utilities::AffymetrixGuidType &MasterFileId() { return masterFileId; } /*! The method the probe array type was assigned. * @return The assignment method. */ PATAssignmentMethod &PatAssignment() { return patAssignment; } /*! The date and time of initial creation. * @return The creation date and time. */ std::wstring &CreationDateTime() { return creationDateTime; } /*! The user who created the data object. */ std::wstring &CreatedBy() { return createdBy; } /*! A user comment. * @return A user comment. */ std::wstring &Comment() { return comment; } /*! The step in Calvin that created the array set data. * @return The step in calvin that create the array set data. */ CreateStep &CreatedStep() { return createdStep; } /*! The array attributes. * @return The vector of array attributes. */ affymetrix_calvin_parameter::ParameterNameValuePairVector &Attributes() { return attributes; } /*! Clears the member objects. */ void Clear(); }; /*! An STL vector of array attributes. */ typedef std::vector ArrayAttributesVector; }; #endif // _ArrayAttributes_HEADER_ affxparser/src/fusion/calvin_files/array/src/ArrayAudit.cpp0000644000175200017520000000244114516003651025145 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/array/src/ArrayAudit.h" // using namespace affymetrix_calvin_array; /* * Initialize the class. */ ArrayAuditEntry::ArrayAuditEntry() { } /* * Clean up. */ ArrayAuditEntry::~ArrayAuditEntry() { Clear(); } /*! * Clears each member. */ void ArrayAuditEntry::Clear() { userName = L""; actionType = ""; arrayGuid = ""; inputFileGuids.clear(); outputFileGuids.clear(); actionParameters.clear(); actionDateTime.Clear(); } affxparser/src/fusion/calvin_files/array/src/ArrayAudit.h0000644000175200017520000000765514516003651024626 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ArrayAudit_HEADER_ #define _ArrayAudit_HEADER_ /*! \file ArrayAudit.h This file provides interfaces for an audit item in an array file. */ #include "calvin_files/parameter/src/Parameter.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffymetrixGuid.h" #include "calvin_files/utils/src/DateTime.h" // #include #include #include // namespace affymetrix_calvin_array { /*! A class to store an audit entry associated with an array. */ class ArrayAuditEntry { public: /*! Constructor */ ArrayAuditEntry(); /*! Destructor */ ~ArrayAuditEntry(); protected: /*! The user name creating the audit entry. */ std::wstring userName; /*! The type of action being performed. */ affymetrix_calvin_utilities::AffymetrixGuidType actionType; /*! The guid to the physical array associated with the action. */ affymetrix_calvin_utilities::AffymetrixGuidType arrayGuid; /*! A list of guids for the input file ids to the action. */ affymetrix_calvin_utilities::AffymetrixGuidTypeList inputFileGuids; /*! A list of guids for the output file ids from the action. */ affymetrix_calvin_utilities::AffymetrixGuidTypeList outputFileGuids; /*! A list of parameters associated with the action. */ affymetrix_calvin_parameter::ParameterNameValuePairList actionParameters; /*! The date and time of the action. */ affymetrix_calvin_utilities::DateTime actionDateTime; public: /*! The user name associated with the audit entry. * * @return The user name associated with the audit entry. */ std::wstring &UserName() { return userName; } /*! The date and time of the action. * * @return The date and time of the action. */ affymetrix_calvin_utilities::DateTime &DateTime() { return actionDateTime; } /*! The type of action. * * @return The type of action. */ affymetrix_calvin_utilities::AffymetrixGuidType &ActionType() { return actionType; } /*! The id of the physical array associated with the action. * * @return The physical array id. */ affymetrix_calvin_utilities::AffymetrixGuidType &ArrayGuid() { return arrayGuid; } /*! The list of guids for the input files to the action. * * @return The list of guids for the input files to the action. */ affymetrix_calvin_utilities::AffymetrixGuidTypeList &InputFileGuids() { return inputFileGuids; } /*! The list of guids for the input files to the action. * * @return The list of guids for the output files of the action. */ affymetrix_calvin_utilities::AffymetrixGuidTypeList &OutputFileGuids() { return outputFileGuids; } /*! The list of parameters associated with the action. * * @return The list of parameters associated with the action. */ affymetrix_calvin_parameter::ParameterNameValuePairList &ActionParameters() { return actionParameters; } /*! Clears the member objects. */ void Clear(); }; /*! An STL vector of tbd objects */ typedef std::vector ArrayAuditEntryVector; /*! An STL list of tbd objects */ typedef std::list ArrayAuditEntryList; }; #endif // _ArrayAudit_HEADER_ affxparser/src/fusion/calvin_files/array/src/ArrayAuditActionTypes.h0000644000175200017520000000304414516003651026775 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ArrayAuditActionTypes_HEADER_ #define _ArrayAuditActionTypes_HEADER_ /*! \file ArrayAuditActionTypes.h Defines constants for various array file audit actions. */ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // namespace affymetrix_calvin_array { /*! Initial creation of an array file. */ static const int32_t ARRAY_FILE_CREATEION_ACTION = 1; /*! Attribute update action. */ static const int32_t ARRAY_FILE_ATTRIBUTE_UPDATE_ACTION = 2; /*! Fluidics instrument control action. */ static const int32_t FLUIDICS_INSTRUMENT_CONTROL_ACTION = 3; /*! Scanner instrument control action. */ static const int32_t SCAN_INSTRUMENT_CONTROL_ACTION = 4; }; #endif // _ArrayAuditActionTypes_HEADER_ affxparser/src/fusion/calvin_files/array/src/ArrayData.cpp0000644000175200017520000000242514516003651024752 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/array/src/ArrayData.h" // using namespace affymetrix_calvin_array; /* * Initialize the class. */ ArrayData::ArrayData() { } /* * Clean up. */ ArrayData::~ArrayData() { Clear(); } /* * Clears the member variables. */ void ArrayData::Clear() { physicalArraysAttributes.clear(); userAttributes.clear(); fileId=""; dataTypeId=""; createdStep=NoStep; initialProject=L""; creationDateTime=L""; createdBy=L""; } affxparser/src/fusion/calvin_files/array/src/ArrayData.h0000644000175200017520000000751414516003651024423 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ArrayData_HEADER_ #define _ArrayData_HEADER_ /*! \file ArrayData.h This file provides interfaces to store information in an array file. */ #include "calvin_files/array/src/ArrayAttributes.h" #include "calvin_files/array/src/ArrayId.h" #include "calvin_files/parameter/src/Parameter.h" #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include // namespace affymetrix_calvin_array { /*! This class provides interfaces to store array information. */ class ArrayData { public: /*! Constructor */ ArrayData(); /*! Destructor */ ~ArrayData(); protected: /*! A unique idendifier for the array set object */ affymetrix_calvin_utilities::AffymetrixGuidType fileId; /*! An identifier to the type of data stored in the file */ affymetrix_calvin_utilities::AffymetrixGuidType dataTypeId; /*! The step in Calvin that created the array set data. */ CreateStep createdStep; /*! The name of the project that initially created the array set data. */ std::wstring initialProject; /*! The date and time of initial creation. */ std::wstring creationDateTime; /*! The user who created the data object. */ std::wstring createdBy; /*! The arrays attributes for the arrays in the set */ ArrayAttributesVector physicalArraysAttributes; /*! The user attributes */ affymetrix_calvin_parameter::ParameterNameValueDefaultRequiredTypeList userAttributes; public: /*! The unique idendifier for the array set. * @return The unique idendifier for the array set. */ affymetrix_calvin_utilities::AffymetrixGuidType &ArraySetFileIdentifier() { return fileId; } /*! The identifier of the type of data stored in the file. * @return The identifier of the type of data. */ affymetrix_calvin_utilities::AffymetrixGuidType &DataTypeIdentifier() { return dataTypeId; } /*! The step in Calvin that created the array set data. * @return The step in calvin that create the array set data. */ CreateStep &CreatedStep() { return createdStep; } /*! The name of the project that initially created the array set data. * @return The project name. */ std::wstring &InitialProject() { return initialProject; } /*! The date and time of initial creation. * @return The creation date and time. */ std::wstring &CreationDateTime() { return creationDateTime; } /*! The user who created the data object. * @return The user name. */ std::wstring &CreatedBy() { return createdBy; } /*! The arrays attributes. Each array in a set will have its own attributes. * @return The vector of arrays attributes. */ ArrayAttributesVector &PhysicalArraysAttributes() { return physicalArraysAttributes; } /*! The user attributes. * @return The vector of user attributes. */ affymetrix_calvin_parameter::ParameterNameValueDefaultRequiredTypeList &UserAttributes() { return userAttributes; } /*! Clears the member objects. */ void Clear(); }; }; #endif // _ArrayData_HEADER_ affxparser/src/fusion/calvin_files/array/src/ArrayId.h0000644000175200017520000000244514516003651024104 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ArrayId_HEADER_ #define _ArrayId_HEADER_ /*! \file ArrayId.h This file defines the ID for the array file. */ /*! The identifier for the array. */ #define ARRAY_TYPE_IDENTIFIER "affymetrix-calvin-array" /*! The identifier for an array file. */ #define ARRAY_FILE_TYPE_IDENTIFIER "affymetrix-calvin-arrayfile" /*! The identifier for an array file. */ #define ARRAY_SET_FILE_TYPE_IDENTIFIER "affymetrix-calvin-arraysetfile" #endif // _ArrayId_HEADER_ affxparser/src/fusion/calvin_files/array/src/ArrayMedia.cpp0000644000175200017520000000331314516003651025115 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/array/src/ArrayMedia.h" // using namespace affymetrix_calvin_array; /*! The GeneChip cartridge. */ #define MEDIA_TYPE_CARTRIDGE std::wstring(L"Cartridge") /*! A 96 well plate or strip. */ #define MEDIA_TYPE_PLATE_OR_STRIP std::wstring(L"PlateOrStrip") /* * Convert the media type to a string. */ std::wstring affymetrix_calvin_array::MediaToString(ArrayMedia media) { switch (media) { case CartridgeMedia: return MEDIA_TYPE_CARTRIDGE; break; case PlateOrStripMedia: return MEDIA_TYPE_PLATE_OR_STRIP; break; default: return L""; break; } } /* * Convert the string to media type. */ ArrayMedia affymetrix_calvin_array::MediaFromString(const std::wstring &media) { if (media == MEDIA_TYPE_CARTRIDGE) return CartridgeMedia; else if (media == MEDIA_TYPE_PLATE_OR_STRIP) return PlateOrStripMedia; else return CartridgeMedia; } affxparser/src/fusion/calvin_files/array/src/ArrayMedia.h0000644000175200017520000000311614516003651024563 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ArrayMedia_HEADER_ #define _ArrayMedia_HEADER_ /*! \file ArrayMedia.h This file a type for defining the media type of an array. */ #include #include // namespace affymetrix_calvin_array { /*! The type of media of the array. */ typedef enum _ArrayMedia { CartridgeMedia, /*! A GeneChip cartridge. */ PlateOrStripMedia /*! A 96 well plate or peg strip. */ } ArrayMedia; /*! Converts the media type to a string. * @param media The media type * @return The string representation. */ std::wstring MediaToString(ArrayMedia media); /*! Converts a string to media type. * @param media The string representation. * @return The media */ ArrayMedia MediaFromString(const std::wstring &media); }; #endif // _ArrayMedia_HEADER_ affxparser/src/fusion/calvin_files/array/src/CreateStep.cpp0000644000175200017520000000611314516003651025137 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/array/src/CreateStep.h" // using namespace affymetrix_calvin_array; /*! The none step. */ #define CREATED_STEP_NONE std::wstring(L"None") /*! The array registration step. */ #define CREATED_STEP_ARRAY_REG std::wstring(L"ArrayRegistration") /*! The scan step. */ #define CREATED_STEP_SCAN std::wstring(L"Scanning") /*! The grid alignment step. */ #define CREATED_STEP_GRID std::wstring(L"Gridding") /*! The cel analysis step. */ #define CREATED_STEP_CEL std::wstring(L"CELAnalysis") /*! Other undefined step. */ #define CREATED_STEP_OTHER std::wstring(L"Other") /*! From step. */ #define CREATED_STEP_FROM std::wstring(L"From") /*! Job order step. */ #define CREATED_STEP_JOB_ORDER std::wstring(L"JobOrderServer") /*! File indexer step. */ #define CREATED_STEP_FILE_INDEXER std::wstring(L"FileIndexer") /* * Convert the step type to a string. */ std::wstring affymetrix_calvin_array::CreateStepToString(CreateStep step) { switch (step) { case NoStep: return CREATED_STEP_NONE; break; case ArrayRegistrationStep: return CREATED_STEP_ARRAY_REG; break; case ScanningStep: return CREATED_STEP_SCAN; break; case GriddingStep: return CREATED_STEP_GRID; break; case CELAnalysisStep: return CREATED_STEP_CEL; break; case OtherStep: return CREATED_STEP_OTHER; break; case FromStep: return CREATED_STEP_FROM; break; case JobOrderServerStep: return CREATED_STEP_JOB_ORDER; break; case FileIndexerStep: return CREATED_STEP_FILE_INDEXER; break; default: return L""; break; } } /* * Convert a string to step type. */ CreateStep affymetrix_calvin_array::CreateStepFromString(const std::wstring &step) { if (step == CREATED_STEP_NONE) return NoStep; else if (step == CREATED_STEP_ARRAY_REG) return ArrayRegistrationStep; else if (step == CREATED_STEP_SCAN) return ScanningStep; else if (step == CREATED_STEP_GRID) return GriddingStep; else if (step == CREATED_STEP_CEL) return CELAnalysisStep; else if (step == CREATED_STEP_OTHER) return OtherStep; else if (step == CREATED_STEP_FROM) return FromStep; else if (step == CREATED_STEP_JOB_ORDER) return JobOrderServerStep; else if (step == CREATED_STEP_FILE_INDEXER) return FileIndexerStep; else return OtherStep; } affxparser/src/fusion/calvin_files/array/src/CreateStep.h0000644000175200017520000000355214516003651024610 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CreateStep_HEADER_ #define _CreateStep_HEADER_ /*! \file CreateStep.h This file provides a type for definining a step that created an array object. */ #include #include // namespace affymetrix_calvin_array { /*! Defines the steps that can create array data. */ typedef enum _CreateStep { NoStep, /*! No step. */ ArrayRegistrationStep, /*! Array registration. */ ScanningStep, /*! Scanning. */ GriddingStep, /*! Grid analysis. */ CELAnalysisStep, /*! CEL file analysis. */ FromStep, /*! From. */ JobOrderServerStep, /*! Job order. */ FileIndexerStep, /*! File indexer. */ OtherStep /*! Any other step. */ } CreateStep; /*! Converts the step type to a string. * @param step The create step * @return The string representation. */ std::wstring CreateStepToString(CreateStep step); /*! Converts a string to step type. * @param step The string representation. * @return The create step */ CreateStep CreateStepFromString(const std::wstring &step); } #endif // _CreateStep_HEADER_ affxparser/src/fusion/calvin_files/array/src/PATAssignment.cpp0000644000175200017520000000442214516003651025556 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/array/src/PATAssignment.h" // using namespace affymetrix_calvin_array; /*! No probe array type assignment. */ #define PAT_ASSIGNMENT_NONE std::wstring(L"None") /*! Affy barcode selected probe array type. */ #define PAT_ASSIGNMENT_BARCODE std::wstring(L"AffyBarcode") /*! User selected probe array type. */ #define PAT_ASSIGNMENT_USER_SELECTED std::wstring(L"UserSelected") /*! Other assignment. */ #define PAT_ASSIGNMENT_OTHER std::wstring(L"Other") /* * Convert the probe array assignment type to a string. */ std::wstring affymetrix_calvin_array::PATAssignmentMethodToString(PATAssignmentMethod pat) { switch (pat) { case NoAssignment: return PAT_ASSIGNMENT_NONE; break; case AffyBarcodeAssignment: return PAT_ASSIGNMENT_BARCODE; break; case UserSelectedAssignment: return PAT_ASSIGNMENT_USER_SELECTED; break; case OtherAssignment: return PAT_ASSIGNMENT_OTHER; break; default: return L""; break; } } /* * Convert the string to the probe array assignment type. */ PATAssignmentMethod affymetrix_calvin_array::PATAssignmentMethodFromString(const std::wstring &pat) { if (pat == PAT_ASSIGNMENT_NONE) { return NoAssignment; } else if (pat == PAT_ASSIGNMENT_BARCODE) { return AffyBarcodeAssignment; } else if (pat == PAT_ASSIGNMENT_USER_SELECTED) { return UserSelectedAssignment; } else if (pat == PAT_ASSIGNMENT_OTHER) { return OtherAssignment; } else { return NoAssignment; } } affxparser/src/fusion/calvin_files/array/src/PATAssignment.h0000644000175200017520000000351514516003651025225 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _PATAssignment_HEADER_ #define _PATAssignment_HEADER_ /*! \file PATAssignment.h This file provides a type to define the method the probe array type was assigned. */ #include #include // namespace affymetrix_calvin_array { /*! The method the probe array type was assigned. */ typedef enum _PATAssignmentMethod { NoAssignment, /*! Unknown */ AffyBarcodeAssignment, /*! User entered an Affy barcode. */ UserSelectedAssignment, /*! User selected. */ OtherAssignment /*! Other method. */ } PATAssignmentMethod; /*! Converts the probe array assignment type to a string. * @param pat The probe array assignment * @return The string representation. */ std::wstring PATAssignmentMethodToString(PATAssignmentMethod pat); /*! Converts a string to the probe array assignment type. * @param pat The string representation. * @return The probe array assignment */ PATAssignmentMethod PATAssignmentMethodFromString(const std::wstring &pat); }; #endif // _PATAssignment_HEADER_ affxparser/src/fusion/calvin_files/data/0000755000175200017520000000000014516003651021377 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/data/src/0000755000175200017520000000000014516022540022164 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/data/src/AllelePeaks.h0000644000175200017520000000303214516003651024517 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AllelePeaks_HEADER_ #define _AllelePeaks_HEADER_ /*! \file AllelePeaks.h This file provides types to hold allele peak results. */ #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include // namespace affymetrix_calvin_data { /*! Holds the allele peak results */ typedef struct _AllelePeaks { /*! The name of the marker. */ std::string name; /*! The chromosome value. */ u_int8_t chr; /*! The physical position. */ u_int32_t position; /*! The allele peaks. */ std::vector peaks; } AllelePeaks; } #endif affxparser/src/fusion/calvin_files/data/src/CDFData.cpp0000644000175200017520000003171414516003651024066 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CDFData.h" // #include "calvin_files/data/src/DataException.h" #include "calvin_files/data/src/DataSetHeader.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_exceptions; using namespace affymetrix_calvin_parameter; CDFData::CDFData() { tocDataSet = 0; Clear(); } CDFData::CDFData(const std::string &filename) { tocDataSet = 0; Clear(); SetFilename(filename); } CDFData::~CDFData() { Clear(); } void CDFData::Clear() { genericData.Header().Clear(); setTocMetaData = false; mode = NoMode; curIndex = 0; curGroupFilePos = 0; if (tocDataSet) { tocDataSet->Delete(); tocDataSet = 0; } nameToFilePosMap.clear(); } int32_t CDFData::GetFormatVersion() { return 0; } void CDFData::SetArrayRows(u_int32_t value) { SetUInt32ToGenericHdr(CDF_ROWS_PARAM, value); } u_int32_t CDFData::GetArrayRows() { return GetUInt32FromGenericHdr(CDF_ROWS_PARAM); } void CDFData::SetArrayCols(u_int32_t value) { SetUInt32ToGenericHdr(CDF_COLS_PARAM, value); } u_int32_t CDFData::GetArrayCols() { return GetUInt32FromGenericHdr(CDF_COLS_PARAM); } void CDFData::SetRefSequence(const std::string &seq) { GenericDataHeader* gPtr = genericData.Header().GetGenericDataHdr(); ParameterNameValueType paramType; paramType.SetName(CDF_REFSEQ_PARAM); paramType.SetValueAscii(seq); gPtr->AddNameValParam(paramType); } std::string CDFData::GetRefSequence() { std::string result; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(CDF_REFSEQ_PARAM, paramType)) { result = paramType.GetValueAscii(); } return result; } void CDFData::SetFilename(const std::string &p) { genericData.Header().SetFilename(p); } std::string CDFData::GetFilename() { return genericData.Header().GetFilename(); } int32_t CDFData::GetProbeSetCnt() { return GetUInt32FromGenericHdr(CDF_SETS_PARAM); } void CDFData::SetProbeSetCnt(u_int32_t cnt, CDFDataTypeIds type) { SetDataTypeId(type, cnt); SetUInt32ToGenericHdr(CDF_SETS_PARAM, cnt); } void CDFData::SetDataTypeId(CDFDataTypeIds p, int32_t probesetCnt) { GenericDataHeader* gPtr = genericData.Header().GetGenericDataHdr(); std::string typeId; std::wstring groupName; if(p == Expression) { typeId = AFFY_EXPR_PS; groupName = CDF_PS_GROUP_LBL; } else if(p == Genotyping) { typeId = AFFY_GENO_PS; groupName = CDF_PS_GROUP_LBL; } else if(p == Tag) { typeId = AFFY_TAG_PS; groupName = CDF_PS_GROUP_LBL; } else if(p == Resequencing) { typeId = AFFY_RESEQ_PS; groupName = CDF_PS_GROUP_LBL; } else { typeId = AFFY_CNTRL_PS; groupName = CDF_QC_GROUP_LBL; } gPtr->SetFileTypeId(typeId); CreateDataGroups(groupName, probesetCnt); } std::string CDFData::GetDataTypeId() { GenericDataHeader* gPtr = genericData.Header().GetGenericDataHdr(); return gPtr->GetFileTypeId(); } void CDFData::CreateDataGroups(const std::wstring& p, int32_t probesetCnt) { CreateContentsGroup(p, probesetCnt); CreateProbeSetGroups(probesetCnt); } void CDFData::CreateContentsGroup(const std::wstring& p, int32_t probesetCnt) { DataGroupHeader c(p); DataSetHeader dp; dp.SetRowCnt(probesetCnt); dp.SetName(p); dp.AddUnicodeColumn(L"", MAX_CDF_PROBE_SET_NAME_LENGTH); dp.AddIntColumn(L""); c.AddDataSetHdr(dp); genericData.Header().AddDataGroupHdr(c); } void CDFData::CreateProbeSetGroups(int32_t probesetCnt) { for(int i = 0; i < probesetCnt; i++) { DataGroupHeader c; genericData.Header().AddDataGroupHdr(c); } } u_int32_t CDFData::GetUInt32FromGenericHdr(const std::wstring& name) { int32_t result = 0; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(name, paramType)) { result = paramType.GetValueUInt32(); } return result; } void CDFData::SetUInt32ToGenericHdr(const std::wstring& name, u_int32_t value) { ParameterNameValueType paramType; paramType.SetName(name); paramType.SetValueUInt32(value); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } void CDFData::PrepareForSequentialAccess() { // set the mode mode = SequentialMode; // skip-over the first DataGroup DataGroup dg = genericData.DataGroup(genericData.Header().GetFirstDataGroupFilePos()); curGroupFilePos = dg.Header().GetNextGroupPos(); // initialize the curPos curIndex = 0; } void CDFData::PrepareForAccessByProbeSetIndex() { // set the mode mode = ProbeSetIndexMode; // Open the first DataGroup DataGroup dg = genericData.DataGroup(genericData.Header().GetFirstDataGroupFilePos()); // Open the first DataSet if (tocDataSet) tocDataSet->Delete(); tocDataSet = dg.DataSet(0); if (tocDataSet) tocDataSet->Open(); } void CDFData::PrepareForAccessByProbeSetName() { // set the mode mode = ProbeSetNameMode; // Open the first DataGroup DataGroup dg = genericData.DataGroup(genericData.Header().GetFirstDataGroupFilePos()); // Open the first DataSet if (tocDataSet) tocDataSet->Delete(); tocDataSet = dg.DataSet(0); if (tocDataSet) { if (tocDataSet->Open()) { // build the name-file position map int32_t rows = tocDataSet->Rows(); std::wstring probeSetName; u_int32_t filePos = 0; for (int32_t row = 0; row < rows; ++row) { tocDataSet->GetData(row, TOCProbeSetNameCol, probeSetName); tocDataSet->GetData(row, TOCFilePosCol, filePos); nameToFilePosMap[probeSetName] = filePos; } } } } /* */ void CDFData::GetProbeSetInformation(int32_t index, CDFProbeSetInformation& info) { if (GetDataTypeId() == AFFY_CNTRL_PS) { ProbeSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } if (index >= genericData.Header().GetNumDataGroups()-1) { ProbeSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } if (mode == SequentialMode) { if (index == curIndex) { DataGroup dg = genericData.DataGroup(curGroupFilePos); info.SetDataGroup(dg); // Move to the next data group ++curIndex; curGroupFilePos = dg.Header().GetNextGroupPos(); } else { CDFAccessNotSupportedByModeException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } else if (mode == ProbeSetIndexMode) { u_int32_t filePos = GetTOCFilePosByIndex(index); DataGroup dg = genericData.DataGroup(filePos); info.SetDataGroup(dg); } else { CDFAccessNotSupportedByModeException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } void CDFData::GetProbeSetInformation(const std::wstring& probeSetName, CDFProbeSetInformation& info) { if (GetDataTypeId() == AFFY_CNTRL_PS) { ProbeSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } if (mode != ProbeSetNameMode) { CDFAccessNotSupportedByModeException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } u_int32_t filePos = GetTOCFilePosByName(probeSetName); DataGroup dg = genericData.DataGroup(filePos); info.SetDataGroup(dg); } void CDFData::GetQCProbeSetInformation(int32_t index, CDFQCProbeSetInformation& info) { if (GetDataTypeId() != AFFY_CNTRL_PS) { ProbeSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } if (index >= genericData.Header().GetNumDataGroups()-1) { ProbeSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } if (mode == SequentialMode) { if (index == curIndex) { DataGroup dg = genericData.DataGroup(curGroupFilePos); DataSet* ds = dg.DataSet(0); info.SetDataSet(ds); // Move to the next data group ++curIndex; curGroupFilePos = dg.Header().GetNextGroupPos(); } else { CDFAccessNotSupportedByModeException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } else if (mode == ProbeSetIndexMode) { u_int32_t filePos = GetTOCFilePosByIndex(index); DataGroup dg = genericData.DataGroup(filePos); DataSet* ds = dg.DataSet(0); info.SetDataSet(ds); } else { CDFAccessNotSupportedByModeException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } void CDFData::GetQCProbeSetInformation(const std::wstring& probeSetName, CDFQCProbeSetInformation& info) { if (GetDataTypeId() != AFFY_CNTRL_PS) { ProbeSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } if (mode != ProbeSetNameMode) { CDFAccessNotSupportedByModeException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } u_int32_t filePos = GetTOCFilePosByName(probeSetName); DataGroup dg = genericData.DataGroup(filePos); DataSet* ds = dg.DataSet(0); info.SetDataSet(ds); } u_int32_t CDFData::GetTOCFilePosByIndex(int32_t index) { if (tocDataSet == 0 || tocDataSet->IsOpen() == false) { DataSetNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } u_int32_t filePos = (u_int32_t)-1; tocDataSet->GetData(index, TOCFilePosCol, filePos); return filePos; } u_int32_t CDFData::GetTOCFilePosByName(const std::wstring& name) { if (tocDataSet == 0 || tocDataSet->IsOpen() == false) { DataSetNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } std::map::iterator ii = nameToFilePosMap.find(name); if (ii == nameToFilePosMap.end()) { ProbeSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } else return ii->second; } std::wstring CDFData::GetProbeSetName(int32_t index) { // Access the TOC data set if (tocDataSet == 0) { DataGroup dg = genericData.DataGroup(genericData.Header().GetFirstDataGroupFilePos()); tocDataSet = dg.DataSet(0); if (tocDataSet == 0) { DataSetNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } // Open the TOC data set if not open if (tocDataSet->IsOpen() == false) { tocDataSet->Open(); if (tocDataSet->IsOpen() == false) { DataSetNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } if (index < 0 || index >= tocDataSet->Rows()) { ProbeSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } std::wstring probeSetName; tocDataSet->GetData(index, TOCProbeSetNameCol, probeSetName); return probeSetName; } affxparser/src/fusion/calvin_files/data/src/CDFData.h0000644000175200017520000002261714516003651023535 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! file CDFData.h This file contains classes and definitions for storing CDF file data. */ #ifndef _CDFData_HEADER_ #define _CDFData_HEADER_ #include "calvin_files/data/src/CDFProbeSetInformation.h" #include "calvin_files/data/src/CDFQCProbeSetInformation.h" #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/data/src/GenericData.h" #include "calvin_files/parameter/src/AffymetrixParameterConsts.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! The maximum probe set name length. */ #define MAX_CDF_PROBE_SET_NAME_LENGTH 32 /*! The group name for the control probe set types. */ #define CDF_QC_GROUP_LBL std::wstring(L"Control Probe Set Types") /*! The group name for the probe set names. */ #define CDF_PS_GROUP_LBL std::wstring(L"Probe Set Names") /*! The parameter name for the number of feature rows on the array. */ #define CDF_ROWS_PARAM std::wstring(L"ROWS") /*! The parameter name for the number of feature columns on the array. */ #define CDF_COLS_PARAM std::wstring(L"COLS") /*! The parameter name for the number of probe sets on the array. */ #define CDF_SETS_PARAM std::wstring(L"SETS") /*! The parameter name for the reference sequence (for resequencing arrays). */ #define CDF_REFSEQ_PARAM std::wstring(L"REFSEQ") /*! The parameter name for the unit type value. */ #define CDF_UNIT_TYPE std::wstring(L"Unit Type") /*! The parameter name for the direction value. */ #define CDF_DIRECTION std::wstring(L"Direction") /*! The parameter name for the number of atoms (probe pairs or quartets) value. */ #define CDF_ATOMS std::wstring(L"Atoms") /*! The parameter name for the number of blocks value. */ #define CDF_BLOCKS std::wstring(L"Blocks") /*! The parameter name for the number of cells value. */ #define CDF_CELLS std::wstring(L"Cells") /*! The parameter name for the probe set number value. */ #define CDF_PROBE_SET_NUMBER std::wstring(L"Probe Set Number") /*! The parameter name for the number of cells per atom value. */ #define CDF_CELLS_PER_ATOM std::wstring(L"Cells Per Atom") /*! This class stores data from a Calvin CDF file. */ class CDFData { public: /*! Constructor */ CDFData(); /*! Constructor with file name as input. */ CDFData(const std::string &filename); /*! Destructor */ ~CDFData(); public: /*! Clear member variables */ void Clear(); /*! Set filename. * @param p The name of the file. */ void SetFilename(const std::string &p); /*! Get filename. * @return The file name. */ std::string GetFilename(); /*! Get format version. * @return The CDF format version. */ int32_t GetFormatVersion(); /*! Gets the probe set count. * @return The number of probe sets. */ int32_t GetProbeSetCnt(); /*! The number of probe sets. * @return The number of probe sets. */ void SetProbeSetCnt(u_int32_t cnt, CDFDataTypeIds type); /*! Get the name of a probe set name based on index. * Use this method to get the probe set name for either a regular or QC probe set. * @param index The index to the probe set. Must be between 0 and the number of probe sets - 1. */ std::wstring GetProbeSetName(int32_t index); /*! Sets the number of rows of features in the array. * @param value The row count. */ void SetArrayRows(u_int32_t value); /*! The number of rows of features in the array. * @return The row count. */ u_int32_t GetArrayRows(); /*! Sets the number of columns of features in the array. * @param value The column count. */ void SetArrayCols(u_int32_t value); /*! The number of columns of features in the array. * @return The column count. */ u_int32_t GetArrayCols(); /*! Sets the reference sequence for a resequencing array. * @param seq The reference sequence. */ void SetRefSequence(const std::string &seq); /*! Gets the reference sequence. * @return The reference sequence. */ std::string GetRefSequence(); /*! Gets the data type identifier. * @return The data type identifier. */ std::string GetDataTypeId(); /*! Gets the file header object. * @return The file header object. */ FileHeader* GetFileHeader() { return &genericData.Header(); } /*! Gets the generic data header. * @return The generic data header. */ GenericData& GetGenericData() { return genericData; } /*! Gets the information about a probe set. * @param index The index to the probe set. Must be between 0 and the number of probe sets - 1. * @param info The probe set information. * @exception CDFAccessNotSupportedByModeException * @exception DataSetNotOpenException */ void GetProbeSetInformation(int32_t index, CDFProbeSetInformation& info); /*! Gets the information about a probe set. * @param probeSetName The name of the probe set. * @param info The probe set information. * @exception CDFAccessNotSupportedByModeException * @exception DataSetNotOpenException * @exception ProbeSetNotFoundException */ void GetProbeSetInformation(const std::wstring& probeSetName, CDFProbeSetInformation& info); /*! Gets the information about a QC probe set. * @param index The index of the QC probe set. * @param info The probe set information. * @exception CDFAccessNotSupportedByModeException * @exception DataSetNotOpenException */ void GetQCProbeSetInformation(int32_t index, CDFQCProbeSetInformation& info); /*! Gets the information about a QC probe set. * @param probeSetName The name of the QC probe set. * @param info The probe set information. * @exception CDFAccessNotSupportedByModeException * @exception DataSetNotOpenException * @exception ProbeSetNotFoundException */ void GetQCProbeSetInformation(const std::wstring& probeSetName, CDFQCProbeSetInformation& info); private: /*! The type of access for parsing the file. */ enum AccessMode { NoMode, /*! No access mode. */ SequentialMode, /*! Sequencial access to probe set data. */ ProbeSetIndexMode, /*! Optimized for access given a probe set index. */ ProbeSetNameMode /*! Optimized for access given a probe set name. */ }; /*! The columns in the probe set name set. */ enum TOCColumns { TOCProbeSetNameCol, /*! The probe set name column. */ TOCFilePosCol /*! The file position column. */ }; /*! Create the data groups. * @param p The group name. * @param probesetCnt The number of probe sets. */ void CreateDataGroups(const std::wstring& p, int32_t probesetCnt); /*! Create the table of contents group indexes. * @param p The group name. * @param probesetCnt The number of probe sets. */ void CreateContentsGroup(const std::wstring& p, int32_t probesetCnt); /*! Create a data group for every probe set. * @param probesetCnt The number of probe sets. */ void CreateProbeSetGroups(int32_t probesetCnt); /*! Sets the data type identifier. * @param p The probe set type. * @param probesetCnt The number of probe sets. */ void SetDataTypeId(CDFDataTypeIds p, int32_t probesetCnt); /*! Sets an integer parameter to the header. * @param name The parameter name. * @param value The parameter value. */ void SetUInt32ToGenericHdr(const std::wstring& name, u_int32_t value); /*! Gets an integer parameter from the header given the parameter name. * @param name The parameter name. */ u_int32_t GetUInt32FromGenericHdr(const std::wstring& name); /*! Prepares the object for sequencial access. */ void PrepareForSequentialAccess(); /*! Prepares the object for probe set index access. */ void PrepareForAccessByProbeSetIndex(); /*! Prepares the object for probe set name access. */ void PrepareForAccessByProbeSetName(); /*! Gets the table of contents file position by the probe set index. * @param index The probe set index. * @exception DataSetNotOpenException */ u_int32_t GetTOCFilePosByIndex(int32_t index); /*! Gets the table of contents file position by probe set name. * @param name The probe set name. * @exception DataSetNotOpenException * @exception ProbeSetNotFoundException */ u_int32_t GetTOCFilePosByName(const std::wstring& name); private: /*! The generic data header. */ GenericData genericData; /*! */ bool setTocMetaData; /*! Access mode */ AccessMode mode; /*! sequential index */ int32_t curIndex; /*! file position of the current DataGroup */ u_int32_t curGroupFilePos; /*! pointer to the table of contents DataSet */ DataSet* tocDataSet; /*! map of probe set name to file position*/ std::map nameToFilePosMap; /*! Friend to the reader. */ friend class CDFFileReader; }; } #endif // _CDFData_HEADER_ affxparser/src/fusion/calvin_files/data/src/CDFDataTypes.h0000644000175200017520000000450014516003651024551 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! \file CDFDataTypes.h This file defines the type used by the CDF data headers. */ #ifndef _CDFDataTypes_HEADER_ #define _CDFDataTypes_HEADER_ namespace affymetrix_calvin_io { /*! Data dataSet column data types */ enum CDFDataTypeIds { Expression, /*! An expression probe set. */ Genotyping, /*! A genotyping probe set. */ Tag, /*! A tag (also called universal) probe set. */ Resequencing, /*! A resequencing probe set. */ Control, /*! A control probe set. */ CopyNumber, /*! A copy number probe set. */ GenotypeControl, /*! A genotype control probe set. */ ExpressionControl, /*! An expression control probe set. */ Marker, /*! A polymorphic marker probe set. */ MultichannelMarker /*! A multichannel marker probe set. */ }; /*! The direction of the target sequence the probes are designed to interrogate. */ enum DirectionType { /*! No direction specified */ ProbeNoDirection, /*! Sense */ ProbeSenseDirection, /*! Anti sense */ ProbeAntiSenseDirection, /*! Either */ ProbeEitherDirection }; /*! Defines the type of probe replication of a group */ enum ReplicationType { /*! Unspecified replication type */ UnknownProbeRepType, /*! Different * All probes in the probe group have different sequences */ DifferentProbeRepType, /*! Mixed * Some probes in the probe group have identical sequences */ MixedProbeRepType, /* Identical * All probes in the probe group have identical sequences */ IdenticalProbeRepType }; } #endif affxparser/src/fusion/calvin_files/data/src/CDFProbeGroupInformation.cpp0000644000175200017520000000566714516003651027517 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CDFProbeGroupInformation.h" // #include "calvin_files/data/src/CDFData.h" #include "calvin_files/data/src/CDFProbeInformation.h" #include "calvin_files/data/src/DataSet.h" // using namespace affymetrix_calvin_io; CDFProbeGroupInformation::CDFProbeGroupInformation() { dataSet = 0; } CDFProbeGroupInformation::~CDFProbeGroupInformation() { if (dataSet) { dataSet->Close(); dataSet->Delete(); } } void CDFProbeGroupInformation::SetDataSet(DataSet* ds) { if (dataSet) { dataSet->Close(); dataSet->Delete(); } dataSet = ds; dataSet->Open(); const DataSetHeader& dsh = ds->Header(); type = Expression; direction = 0; listCnt = 0; cellCnt = 0; wobbleSituation = 0; alleleCode = 0; channel = 0; repType = 0; probeSetNumber = 0; cellsPerList = 0; ParameterNameValueType nvt; // UnitType if (dsh.FindNameValParam(CDF_UNIT_TYPE, nvt)) type = (CDFDataTypeIds)nvt.GetValueUInt8(); // Direction if (dsh.FindNameValParam(CDF_DIRECTION, nvt)) direction = nvt.GetValueUInt8(); // ProbeSetNumber if (dsh.FindNameValParam(CDF_PROBE_SET_NUMBER, nvt)) probeSetNumber = nvt.GetValueUInt32(); // CellsPerList (cells per atom) if (dsh.FindNameValParam(CDF_CELLS_PER_ATOM, nvt)) cellsPerList = nvt.GetValueUInt8(); // Wobble situation // if (dsh.FindNameValParam(CDF_WOBBLE_SITUATION, nvt)) // wobbleSituation = nvt.GetValueUInt16(); // Allele code // if (dsh.FindNameValParam(CDF_ALLELE, nvt)) // alleleCode = nvt.GetValueUInt16(); // Channel // if (dsh.FindNameValParam(CDF_CHANNEL, nvt)) // channel = nvt.GetValueUInt8(); // Probe replication type // if (dsh.FindNameValParam(CDF_REP_TYPE, nvt)) // repType = nvt.GetValueUInt8(); // List (atom) if (dsh.FindNameValParam(CDF_ATOMS, nvt)) listCnt = nvt.GetValueUInt32(); // Cells if (dsh.FindNameValParam(CDF_CELLS, nvt)) cellCnt = nvt.GetValueUInt32(); } /* * Get information for the probe by index */ void CDFProbeGroupInformation::GetCell(int32_t cell_index, CDFProbeInformation& info) { if (cell_index < 0 || cell_index >= dataSet->Rows()) return; info.CopyFromDataSet(dataSet, cell_index); } affxparser/src/fusion/calvin_files/data/src/CDFProbeGroupInformation.h0000644000175200017520000000633314516003651027153 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CDFProbeGroupInformation_HEADER_ #define _CDFProbeGroupInformation_HEADER_ /*! \file CDFProbeGroupInformation.h This file defines the CDFProbeGroupInformation class. */ #ifdef _MSC_VER #include #endif #include "calvin_files/data/src/CDFProbeSetInformation.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // namespace affymetrix_calvin_io { class CDFProbeInformation; /*! Class that exposes the CDF probe group information */ class CDFProbeGroupInformation { public: /*! Constructor */ CDFProbeGroupInformation(); /*! Desctructor */ ~CDFProbeGroupInformation(); /*! Get the probe set direction */ DirectionType GetDirection() const { return (DirectionType)direction; } /*! Get the number of lists (atoms) in the probe group */ u_int32_t GetNumLists() const { return listCnt; } /*! Get the number of cells in the probe group */ u_int32_t GetNumCells() const { return cellCnt; } /*! Get the number of cells per list (atom) */ u_int8_t GetNumCellsPerList() const { return (u_int8_t)cellsPerList; } /*! Get the probe group name = DataPlane name */ std::wstring GetName() const { if (dataSet) return dataSet->Header().GetName(); else return L""; } /*! Get the wobble situation */ u_int16_t GetWobbleSituation() const { return wobbleSituation; } /*! Get the allele */ u_int16_t GetAlleleCode() const { return alleleCode; } /*! Get the channel */ u_int8_t GetChannel() const { return channel; } /*! Get the probe replication type */ ReplicationType GetRepType() const { return (ReplicationType)repType; } /*! Get information for the probe by index */ void GetCell(int32_t cell_index, CDFProbeInformation& info); protected: void SetDataSet(DataSet* ds); protected: /*! DataSet pointer */ DataSet* dataSet; /*! Probe set type - u_int8_t*/ CDFDataTypeIds type; /*! Direction */ u_int8_t direction; /*! Number of lists (atoms) in the probe group */ u_int32_t listCnt; /*! Number of cells in the probe group */ u_int32_t cellCnt; /*! Wobble situation */ u_int16_t wobbleSituation; /*! Allele */ u_int16_t alleleCode; /*! Channel */ u_int8_t channel; /*! Probe replication type */ u_int8_t repType; /*! Probe set number - same as ProbeSetNumber in C++ */ u_int32_t probeSetNumber; /*! Cells per list (atom) - same as NumCellsPerList in C++ */ u_int8_t cellsPerList; friend class CDFProbeSetInformation; }; } #endif // _CDFProbeGroupInformation_HEADER_ affxparser/src/fusion/calvin_files/data/src/CDFProbeInformation.cpp0000644000175200017520000000313114516003651026462 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CDFProbeInformation.h" // using namespace affymetrix_calvin_io; /* * Constructor */ CDFProbeInformation::CDFProbeInformation() { xCoord = 0; yCoord = 0; listIndex = 0; indexPos = 0; baseProbe = 0; baseTarget = 0; probeLength = 0; probeGrouping = 0; } void CDFProbeInformation::CopyFromDataSet(DataSet* dataSet, int32_t row) { dataSet->GetData(row, XCoordCol, xCoord); dataSet->GetData(row, YCoordCol, yCoord); dataSet->GetData(row, ListIndexCol, listIndex); dataSet->GetData(row, IndexPosCol, indexPos); dataSet->GetData(row, BaseProbeCol, baseProbe); dataSet->GetData(row, BaseTargetCol, baseTarget); // dataSet->GetData(row, ProbeLengthCol, probeLength); // dataSet->GetData(row, ProbeGroupingCol, probeGrouping); } affxparser/src/fusion/calvin_files/data/src/CDFProbeInformation.h0000644000175200017520000000570414516003651026137 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CDFProbeInformation_HEADER_ #define _CDFProbeInformation_HEADER_ /*! \file CDFProbeInformation.h This file defines the CDFProbeInformation class. */ #ifdef _MSC_VER #include #endif #include "calvin_files/data/src/DataSet.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // namespace affymetrix_calvin_io { /*! Class that exposes the CDF probe information */ class CDFProbeInformation { public: /*! Constructor */ CDFProbeInformation(); /*! Get the list index (atom number) of the probe */ u_int32_t GetListIndex() const { return listIndex; } /*! Get the exon position of the probe (for reseq)*/ int GetExpos() const { return indexPos; } /*! Get the x-coordinate of the probe */ u_int16_t GetX() const { return xCoord; } /*! Get the y-coordinate of the probe */ u_int16_t GetY() const { return yCoord; } /*! Get the base of the probe at the substitution position */ int8_t GetPBase() const { return baseProbe; } /*! Get the base of the target at the interrogation position */ int8_t GetTBase() const { return baseTarget; } /*! Get the probe length */ u_int16_t GetProbeLength() const { return probeLength; } /*! Get the probe grouping */ u_int16_t GetProbeGrouping() const { return probeGrouping; } protected: enum DataSetColumns { XCoordCol, YCoordCol, ListIndexCol, IndexPosCol, BaseProbeCol, BaseTargetCol, ProbeLenCol, ProbeGroupCol }; /*! */ void CopyFromDataSet(DataSet* ds, int32_t row); protected: /*! x-coordinate of the probe, m_X in C++ */ u_int16_t xCoord; /*! y-coordinate of the probe, m_Y in C++ */ u_int16_t yCoord; /*! list index (atom number), m_ListIndex in C++ */ u_int32_t listIndex; /*! exon position for reseq, m_Expos in C++ */ u_int32_t indexPos; /*! Base of the probe at the substitution position, m_PBase in C++ */ int8_t baseProbe; /*! Base of the probe at the interrogation position, m_TBase in C++ */ int8_t baseTarget; /*! Probe length, m_ProbeLength in C++ */ u_int16_t probeLength; /*! Probe grouping, m_ProbeGrouping in C++ */ u_int16_t probeGrouping; friend class CDFProbeGroupInformation; }; } #endif // _CDFProbeInformation_HEADER_ affxparser/src/fusion/calvin_files/data/src/CDFProbeSetInformation.cpp0000644000175200017520000000553114516003651027144 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CDFProbeSetInformation.h" // #include "calvin_files/data/src/CDFData.h" #include "calvin_files/data/src/CDFProbeGroupInformation.h" #include "calvin_files/data/src/DataSet.h" // using namespace affymetrix_calvin_io; /* * Constructor */ CDFProbeSetInformation::CDFProbeSetInformation() { dataGroup = 0; type = Expression; direction = 0; groupCnt = 0; probeSetNumber = 0; cellsPerList = 0; listCnt = 0; cellCnt = 0; } /* * Destructor */ CDFProbeSetInformation::~CDFProbeSetInformation() { delete dataGroup; } void CDFProbeSetInformation::SetDataGroup(DataGroup& dg) { if (dataGroup) delete dataGroup; dataGroup = new DataGroup(dg); const DataGroupHeader& dgh = dataGroup->Header(); type = Expression; direction = 0; groupCnt = 0; probeSetNumber = 0; cellsPerList = 0; listCnt = 0; cellCnt = 0; // Get the first DataSetHeader const DataSetHeader& dsh = dgh.GetDataSetConst(0); ParameterNameValueType nvt; // UnitType if (dsh.FindNameValParam(CDF_UNIT_TYPE, nvt)) type = (CDFDataTypeIds)nvt.GetValueUInt8(); // Direction if (dsh.FindNameValParam(CDF_DIRECTION, nvt)) direction = nvt.GetValueUInt8(); // ProbeGroup count groupCnt = dgh.GetDataSetCnt(); // ProbeSetNumber if (dsh.FindNameValParam(CDF_PROBE_SET_NUMBER, nvt)) probeSetNumber = nvt.GetValueUInt32(); // CellsPerList if (dsh.FindNameValParam(CDF_CELLS_PER_ATOM, nvt)) cellsPerList = nvt.GetValueUInt8(); // Sum up the totals for (int32_t group = 0; group < (int32_t)groupCnt; ++group) { if (dsh.FindNameValParam(CDF_ATOMS, nvt)) { listCnt += nvt.GetValueUInt32(); } if (dsh.FindNameValParam(CDF_CELLS, nvt)) { cellCnt += nvt.GetValueUInt32(); } } } void CDFProbeSetInformation::GetGroupInformation(u_int32_t groupIdx, CDFProbeGroupInformation& info) { // WAS if (groupIdx < 0 || ...) BUT 'groupIdx < 0' // is always false because 'groupIdx' is unsigned. if (groupIdx >= groupCnt) return; DataSet* ds = dataGroup->DataSet(groupIdx); info.SetDataSet(ds); } affxparser/src/fusion/calvin_files/data/src/CDFProbeSetInformation.h0000644000175200017520000000655514516003651026620 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CDFProbeSetInformation_HEADER_ #define _CDFProbeSetInformation_HEADER_ /*! \file CDFProbeSetInformation.h This file defines the CDFProbeSetInformation class. */ #ifdef _MSC_VER #include #endif #include "calvin_files/data/src/CDFDataTypes.h" #include "calvin_files/data/src/DataGroup.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // namespace affymetrix_calvin_io { class CDFProbeGroupInformation; /*! Class that exposes the CDF probe set information */ class CDFProbeSetInformation { public: /*! Constructor */ CDFProbeSetInformation(); /*! Destructor */ ~CDFProbeSetInformation(); /*! Get the probe set name */ std::wstring GetName() const { if (dataGroup) return dataGroup->Header().GetName(); else return L""; } /*! Get the probe set direction */ affymetrix_calvin_io::DirectionType GetDirection() const { return (affymetrix_calvin_io::DirectionType)direction; } /*! Get the total number of lists (atoms) in the probe set */ u_int32_t GetNumLists() const { return listCnt; } /*! Get the number of groups (block) in the probe set */ u_int32_t GetNumGroups() const { return groupCnt; } /*! Get the total number of cells in the probe set */ u_int32_t GetNumCells() const { return cellCnt; } /*! Get the number of cells per list (atom) */ u_int8_t GetNumCellsPerList() const { return (u_int8_t)cellsPerList; } /*! Get the probe set number */ u_int32_t GetProbeSetNumber() const { return probeSetNumber; } /*! Get information for a probe set group by index */ void GetGroupInformation(u_int32_t groupIdx, CDFProbeGroupInformation & info); /*! Get the probe set type. */ CDFDataTypeIds GetProbeSetType() const { return (CDFDataTypeIds)type; } protected: /*! */ void SetDataGroup(DataGroup& dg); protected: /*! DataGroup */ DataGroup* dataGroup; /*! Probe set type - based on unitType of first DataSet u_int8_t*/ CDFDataTypeIds type; /*! Direction - based on first DataSet */ u_int8_t direction; /*! Total number of groups (blocks), = number of DataSets in the DataGroup */ u_int32_t groupCnt; /*! Total number of lists (atoms) in the probe set - sum of all DataSets */ u_int32_t listCnt; /*! Total number of cells in the probe set - sum of all DataSets */ u_int32_t cellCnt; /*! Probe set number - based on value in first DataSet, same as ProbeSetNumber in C++ */ u_int32_t probeSetNumber; /*! Cells per list (Cell per atom) - based on value in the first DataSet, same as NumCellsPerList in C++ */ u_int8_t cellsPerList; friend class CDFData; }; } #endif // _CDFProbeSetInformation_HEADER_ affxparser/src/fusion/calvin_files/data/src/CDFQCProbeInformation.cpp0000644000175200017520000000270114516003651026710 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CDFQCProbeInformation.h" // using namespace affymetrix_calvin_io; /* * Constructor */ CDFQCProbeInformation::CDFQCProbeInformation() { xCoord = 0; yCoord = 0; probeLength = 0; perfectMatchFlag = 0; backgroundProbeFlag = 0; } void CDFQCProbeInformation::CopyFromDataSet(DataSet* dataSet, int32_t row) { dataSet->GetData(row, XCoordCol, xCoord); dataSet->GetData(row, YCoordCol, yCoord); dataSet->GetData(row, ProbeLengthCol, probeLength); dataSet->GetData(row, PerfectMatchFlagCol, perfectMatchFlag); dataSet->GetData(row, BackgroundProbeFlagCol, backgroundProbeFlag); } affxparser/src/fusion/calvin_files/data/src/CDFQCProbeInformation.h0000644000175200017520000000453214516003651026361 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CDFQCProbeInformation_HEADER_ #define _CDFQCProbeInformation_HEADER_ /*! \file CDFQCProbeInformation.h This file defines the CDFQCProbeInformation class. */ #include "calvin_files/data/src/DataSet.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // namespace affymetrix_calvin_io { /*! Class that exposes the CDF QC probe information */ class CDFQCProbeInformation { public: /*! Constructor */ CDFQCProbeInformation(); /*! Get the x-coordinate of the probe */ u_int16_t GetX() const { return xCoord; } /*! Get the y-coordinate of the probe */ u_int16_t GetY() const { return yCoord; } /*! Get the probe length */ u_int8_t GetPLen() const { return probeLength; } /*! Is this a perfect match probe */ bool IsPerfectMatchProbe() const { return (perfectMatchFlag == 1 ? true : false); } /*! Is this a background probe */ bool IsBackgroundProbe() const { return (backgroundProbeFlag == 1 ? true : false); } protected: enum DataSetColumns { XCoordCol, YCoordCol, ProbeLengthCol, PerfectMatchFlagCol, BackgroundProbeFlagCol }; /*! */ void CopyFromDataSet(DataSet* ds, int32_t row); protected: /*! x-coordinate of the probe, m_X in C++ */ u_int16_t xCoord; /*! y-coordinate of the probe, m_Y in C++ */ u_int16_t yCoord; /*! Probe length */ u_int8_t probeLength; /*! Perfect match flag, 1 = match */ u_int8_t perfectMatchFlag; /*! Background probe flag, 1 = background */ u_int8_t backgroundProbeFlag; friend class CDFQCProbeSetInformation; }; } #endif // _CDFQCProbeInformation_HEADER_ affxparser/src/fusion/calvin_files/data/src/CDFQCProbeSetInformation.cpp0000644000175200017520000000401214516003651027361 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CDFQCProbeSetInformation.h" // #include "calvin_files/data/src/CDFData.h" #include "calvin_files/data/src/CDFQCProbeInformation.h" #include "calvin_files/data/src/DataSet.h" // using namespace affymetrix_calvin_io; CDFQCProbeSetInformation::CDFQCProbeSetInformation() { dataSet = 0; } CDFQCProbeSetInformation::~CDFQCProbeSetInformation() { if (dataSet) { dataSet->Close(); dataSet->Delete(); } } /* * Get the QC type - DataSet name */ std::wstring CDFQCProbeSetInformation::GetQCProbeSetType() const { return dataSet->Header().GetName(); } /* * Get the number of cells in the probe set - number of rows in the DataSet */ int32_t CDFQCProbeSetInformation::GetNumCells() const { if (dataSet == 0) return 0; return dataSet->Rows(); } void CDFQCProbeSetInformation::SetDataSet(DataSet* ds) { if (dataSet) { dataSet->Close(); dataSet->Delete(); } dataSet = ds; dataSet->Open(); } /* * Get information for the probe by index */ void CDFQCProbeSetInformation::GetProbeInformation(int32_t cell_index, CDFQCProbeInformation& info) { if (cell_index < 0 || cell_index >= dataSet->Rows()) return; info.CopyFromDataSet(dataSet, cell_index); } affxparser/src/fusion/calvin_files/data/src/CDFQCProbeSetInformation.h0000644000175200017520000000364714516003651027043 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CDFQCProbeSetInformation_HEADER_ #define _CDFQCProbeSetInformation_HEADER_ #ifdef _MSC_VER #include #endif /*! \file CDFQCProbeSetInformation.h This file defines the CDFQCProbeSetInformation class. */ #include "calvin_files/data/src/DataSet.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include // namespace affymetrix_calvin_io { class CDFQCProbeInformation; /*! Class that exposes the CDF QC probe set information */ class CDFQCProbeSetInformation { public: /*! Constructor */ CDFQCProbeSetInformation(); /*! Destructor */ ~CDFQCProbeSetInformation(); /*! Get the QC type - DataSet name */ std::wstring GetQCProbeSetType() const; /*! Get the number of cells in the probe set - number of rows in the DataSet */ int32_t GetNumCells() const; /*! Get the QC probe information */ void GetProbeInformation(int index, CDFQCProbeInformation& info); protected: void SetDataSet(DataSet* ds); protected: /*! DataSet pointer */ DataSet* dataSet; friend class CDFData; }; } #endif // _CDFQCProbeSetInformation_HEADER_ affxparser/src/fusion/calvin_files/data/src/CELData.cpp0000644000175200017520000005454514516003651024104 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CELData.h" // #include "calvin_files/data/src/DataSetHeader.h" #include "calvin_files/data/src/GenericDataTypes.h" #include "calvin_files/exception/src/ExceptionBase.h" #include "calvin_files/utils/src/FileUtils.h" #include "calvin_files/utils/src/StringUtils.h" // #include // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_exceptions; using namespace affymetrix_calvin_parameter; /* * Default constructor */ CelFileData::CelFileData() { dpInten = 0; dpStdev = 0; dpPixels = 0; Clear(); } /* * Constructor */ CelFileData::CelFileData(const std::string &filename) { dpInten = 0; dpStdev = 0; dpPixels = 0; Clear(); SetFilename(filename); genericData.Header().GetGenericDataHdr()->SetFileTypeId(INTENSITY_DATA_TYPE); DataGroupHeader dcHdr(CelDataGroupName); genericData.Header().AddDataGroupHdr(dcHdr); } /* * Destructor */ CelFileData::~CelFileData() { Clear(); } /* * Clear the object members. */ void CelFileData::Clear() { genericData.Header().Clear(); setIntensityMetaData = false; setStdDevMetaData = false; setPixelMetaData = false; setOutlierMetaData = false; setMaskMetaData = false; CloseDataSets(); ResetMaskAndOutliers(); cachedRows = -1; cachedCols = -1; intensityColumnType = -1; activeChannel = CelDataGroupName; } void CelFileData::CloseDataSets() { if (dpInten) { dpInten->Delete(); dpInten = 0; } if (dpStdev) { dpStdev->Delete(); dpStdev = 0; } if (dpPixels) { dpPixels->Delete(); dpPixels = 0; } } void CelFileData::ResetMaskAndOutliers() { outlierPlaneRead = false; outliers.clear(); maskPlaneRead = false; masked.clear(); } void CelFileData::SetActiveChannel(const std::wstring &channel) { if(activeChannel != channel) { activeChannel = channel; CloseDataSets(); ResetMaskAndOutliers(); } } /* * Check if the file exists. */ bool CelFileData::Exists() { std::string filename = genericData.Header().GetFilename(); if (filename != "") { // Find the file stats. return FileUtils::Exists(filename.c_str()); } return false; } /* * Set file name */ void CelFileData::SetFilename(const std::string &p) { genericData.Header().SetFilename(p); } /* * Get file name */ std::string CelFileData::GetFilename() const { return ((GenericData&)genericData).Header().GetFilename(); } void CelFileData::SetIntensityCount(int32_t ln) { DataSetHeader setHdr; setHdr.SetRowCnt(ln); setHdr.SetName(CelIntensityLabel); setHdr.AddFloatColumn(CelIntensityLabel); if(setIntensityMetaData) { UpdateDataSetRowCount(setHdr); } else { InsertDataSetHeader(setHdr); setIntensityMetaData = true; } } void CelFileData::SetStdDevCount(int32_t ln) { DataSetHeader setHdr; setHdr.SetRowCnt(ln); setHdr.SetName(CelStdDevLabel); setHdr.AddFloatColumn(CelStdDevLabel); if(setStdDevMetaData) { UpdateDataSetRowCount(setHdr); } else { InsertDataSetHeader(setHdr); setStdDevMetaData = true; } } void CelFileData::SetPixelCount(int32_t ln) { DataSetHeader setHdr; setHdr.SetRowCnt(ln); setHdr.SetName(CelPixelLabel); setHdr.AddShortColumn(CelPixelLabel); if(setPixelMetaData) { UpdateDataSetRowCount(setHdr); } else { InsertDataSetHeader(setHdr); setPixelMetaData = true; } } void CelFileData::SetOutlierCount(int32_t ln) { DataSetHeader setHdr; setHdr.SetRowCnt(ln); setHdr.SetName(CelOutlierLabel); setHdr.AddShortColumn(L"X"); setHdr.AddShortColumn(L"Y"); if(setOutlierMetaData) { UpdateDataSetRowCount(setHdr); } else { InsertDataSetHeader(setHdr); setOutlierMetaData = true; } } void CelFileData::SetMaskCount(int32_t ln) { DataSetHeader setHdr; setHdr.SetRowCnt(ln); setHdr.SetName(CelMaskLabel); setHdr.AddShortColumn(L"X"); setHdr.AddShortColumn(L"Y"); if(setMaskMetaData) { UpdateDataSetRowCount(setHdr); } else { InsertDataSetHeader(setHdr); setMaskMetaData = true; } } void CelFileData::UpdateDataSetRowCount(const DataSetHeader &hdr) { DataGroupHeader* grpHdr = &genericData.Header().GetDataGroup(0); bool found = false; int sz = grpHdr->GetDataSetCnt(); for(int i = 0; i < sz; i++) { DataSetHeader* dpHdr = &grpHdr->GetDataSet(i); if(dpHdr->GetName() == hdr.GetName()) { dpHdr->SetRowCnt(hdr.GetRowCnt()); found = true; break; } } if(!found) { InsertDataSetHeader(hdr); } } void CelFileData::InsertDataSetHeader(const DataSetHeader &hdr) { DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(0); dcHdr->AddDataSetHdr(hdr); } /* * Return the number of cells on the array. */ int32_t CelFileData::GetNumCells() { int32_t rows = 0; try { PrepareIntensityPlane(); if (dpInten) { rows = dpInten->Rows(); } } catch(CalvinException&) { } return rows; } /* * Prepare to read intensity data */ void CelFileData::PrepareIntensityPlane() { if (dpInten == 0) { dpInten = genericData.DataSet(activeChannel, CelIntensityLabel); if (dpInten) { dpInten->Open(); intensityColumnType = dpInten->Header().GetColumnInfo(0).GetColumnType(); } } } /* * Prepare to read the standard deviation data */ void CelFileData::PrepareStdevPlane() { if (dpStdev == 0) { dpStdev = genericData.DataSet(activeChannel, CelStdDevLabel); if (dpStdev) { dpStdev->Open(); } } } /* * Prepare to read the number of pixel data */ void CelFileData::PrepareNumPixelPlane() { if (dpPixels == 0) { dpPixels = genericData.DataSet(activeChannel, CelPixelLabel); if (dpPixels) { dpPixels->Open(); } } } /* * Prepare to read the outlier data */ void CelFileData::PrepareOutlierPlane() { if (outlierPlaneRead) { return; } outlierPlaneRead = true; // Read attempted try { DataSet* dpOutlier = genericData.DataSet(activeChannel, CelOutlierLabel); if (dpOutlier) { if (dpOutlier->Open()) { int32_t rows = dpOutlier->Rows(); for (int32_t row = 0; row < rows; ++row) { int16_t x = 0, y = 0; dpOutlier->GetData(row, 0, x); dpOutlier->GetData(row, 1, y); XYCoord xy(x,y); outliers.insert(xy); } } dpOutlier->Delete(); dpOutlier = 0; } } catch(affymetrix_calvin_exceptions::DataSetNotFoundException) { } } /* * Prepare to read the mask data */ void CelFileData::PrepareMaskedPlane() { if (maskPlaneRead) { return; } maskPlaneRead = true; // Read attempted try { DataSet* dpMasked = genericData.DataSet(activeChannel, CelMaskLabel); if (dpMasked) { if (dpMasked->Open()) { int32_t rows = dpMasked->Rows(); for (int32_t row = 0; row < rows; ++row) { int16_t x = 0, y = 0; dpMasked->GetData(row, 0, x); dpMasked->GetData(row, 1, y); XYCoord xy(x,y); masked.insert(xy); } } dpMasked->Delete(); dpMasked = 0; } } catch(affymetrix_calvin_exceptions::DataSetNotFoundException) { } } /* * Prepare to read all data */ void CelFileData::PrepareAllPlanes() { PrepareIntensityPlane(); PrepareStdevPlane(); PrepareNumPixelPlane(); PrepareOutlierPlane(); PrepareMaskedPlane(); } /* * Get the cell intensity, standard deviation, number of pixels, outlier flag and mask flag. */ void CelFileData::GetData(int32_t cellIdx, float& intensity, float& stdev, int16_t& numPixels, bool& outlier, bool& masked) { PrepareAllPlanes(); // Index checking is done in the Generic layer if (dpInten) { // if out-of-bounds, forward the exception if (intensityColumnType == FloatColType) dpInten->GetData(cellIdx, 0, intensity); else // try u_int16_t or throw { u_int16_t int16Inten; dpInten->GetData(cellIdx, 0, int16Inten); intensity = (float)int16Inten; } } if (dpStdev) { try { stdev = 0.0f; if (dpStdev) { if (cellIdx < dpStdev->Rows()) dpStdev->GetData(cellIdx, 0, stdev); } } catch(CalvinException&) { stdev = 0.0f; } } if (dpPixels) { try { numPixels = 0; if (dpPixels) { if (cellIdx < dpPixels->Rows()) dpPixels->GetData(cellIdx, 0, numPixels); } } catch(CalvinException&) { numPixels = 0; } } outlier = IsOutlier(cellIdx); masked = IsMasked(cellIdx); } /* * Get the intensity as used in FusionCELData */ float CelFileData::GetIntensity(int index) { // allocate a vector, fill it with one item FloatVector v; this->GetIntensities(index, 1, v); // and return that one item. return v.at(0); } /* * Get the intensities for a range of cell indexes. */ bool CelFileData::GetIntensities(int32_t cellIdxStart, int32_t count, FloatVector& values) { PrepareIntensityPlane(); if (dpInten && dpInten->IsOpen()) { if (intensityColumnType == FloatColType) dpInten->GetData(0, cellIdxStart, count, values); else // try u_int16_t or throw { Uint16Vector uint16Vector; dpInten->GetData(0, cellIdxStart, count, uint16Vector); values.resize(uint16Vector.size()); for (u_int32_t i = 0; i < uint16Vector.size(); ++i) values[i] = (float)uint16Vector[i]; } return (count == values.size()); } return false; } /* * Get the standard deviations for a range of cell indexes. */ bool CelFileData::GetStdev(int32_t cellIdxStart, int32_t count, FloatVector& values) { PrepareStdevPlane(); if (dpStdev && dpStdev->IsOpen()) { dpStdev->GetData(0, cellIdxStart, count, values); return (count == values.size()); } return false; } /* * Get the number of pixels for a range of cell indexes. */ bool CelFileData::GetNumPixels(int32_t cellIdxStart, int32_t count, Int16Vector& values) { PrepareNumPixelPlane(); if (dpPixels && dpPixels->IsOpen()) { dpPixels->GetData(0, cellIdxStart, count, values); return (count == values.size()); } return false; } /* * Get the outlier flags for a range of cell indexes. */ bool CelFileData::GetOutliers(int32_t cellIdxStart, int32_t count, BoolVector& values) { PrepareOutlierPlane(); if (outliers.empty()) return false; int32_t lastCell = cellIdxStart+count; if (lastCell > GetNumCells()) lastCell = GetNumCells(); for (int32_t cellIdx = cellIdxStart; cellIdx < lastCell; ++cellIdx) values.push_back(IsOutlier(cellIdx)); return true; } /* * Get the mask flags for a range of cell indexes. */ bool CelFileData::GetMasked(int32_t cellIdxStart, int32_t count, BoolVector& values) { PrepareMaskedPlane(); if (masked.empty()) return false; int32_t lastCell = cellIdxStart+count; if (lastCell > GetNumCells()) lastCell = GetNumCells(); for (int32_t cellIdx = cellIdxStart; cellIdx < lastCell; ++cellIdx) values.push_back(IsMasked(cellIdx)); return true; } /* * Get the coordinates of all outlier cells (i.e. outlier flag is true). */ void CelFileData::GetOutlierCoords(XYCoordVector& coords) { PrepareOutlierPlane(); for (std::set::iterator ii = outliers.begin(); ii != outliers.end(); ++ii) { XYCoord xy(ii->xCoord, ii->yCoord); coords.push_back(xy); } } /* * Get the coordinates of all masked cells (i.e. mask flag is true). */ void CelFileData::GetMaskedCoords(XYCoordVector& coords) { PrepareMaskedPlane(); for (std::set::iterator ii = masked.begin(); ii != masked.end(); ++ii) { XYCoord xy(ii->xCoord, ii->yCoord); coords.push_back(xy); } } /* * Indicates whether there are standard deviation values. */ bool CelFileData::HasStdev() { DataSetHeader* dph = FindDataSetHeader(CelStdDevLabel); if (dph) return (dph->GetRowCnt() > 0); return false; } /* * Indicates whether there are number of pixel values. */ bool CelFileData::HasNumPixels() { DataSetHeader* dph = FindDataSetHeader(CelPixelLabel); if (dph) return (dph->GetRowCnt() > 0); return false; } /* * Set the version of the CEL file */ void CelFileData::SetVersion(u_int8_t value) { ParameterNameValueType paramType; paramType.SetName(FILE_VERSION_PARAM_NAME); paramType.SetValueUInt8(value); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } /* * Get the version of the CEL file */ u_int8_t CelFileData::GetVersion() { u_int8_t result = 0; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(FILE_VERSION_PARAM_NAME, paramType)) { result = paramType.GetValueUInt8(); } return result; } /* * Set the array type */ void CelFileData::SetArrayType(const std::wstring& value) { SetWStringToGenericHdr(ARRAY_TYPE_PARAM_NAME, value, ARRAY_TYPE_MAX_LEN); } /* * Get the array type */ std::wstring CelFileData::GetArrayType() { return GetWStringFromGenericHdr(ARRAY_TYPE_PARAM_NAME); } /* * Set the master file */ void CelFileData::SetMasterFileName(const std::wstring& value) { SetWStringToGenericHdr(MASTER_FILE_PARAM_NAME, value); } /* * Get the master file */ std::wstring CelFileData::GetMasterFileName() { return GetWStringFromGenericHdr(MASTER_FILE_PARAM_NAME); } /* * Set the library package */ void CelFileData::SetLibraryPackageName(const std::wstring& value) { SetWStringToGenericHdr(LIBRARY_PACKAGE_PARAM_NAME, value, ARRAY_TYPE_MAX_LEN); } /* * Get the library package */ std::wstring CelFileData::GetLibraryPackageName() { return GetWStringFromGenericHdr(LIBRARY_PACKAGE_PARAM_NAME); } /* * Set name of the algorithm used to generate the results. */ void CelFileData::SetAlgorithmName(const std::wstring& value) { SetWStringToGenericHdr(ALGORITHM_NAME_PARAM_NAME, value); } /* * Get the name of the algorithm used to generate the results. */ std::wstring CelFileData::GetAlgorithmName() { return GetWStringFromGenericHdr(ALGORITHM_NAME_PARAM_NAME); } /* * Set name of the algorithm used to generate the results. */ void CelFileData::SetAlgorithmVersion(const std::wstring& value) { SetWStringToGenericHdr(ALG_VERSION_PARAM_NAME, value); } /* * Get the name of the algorithm used to generate the results. */ std::wstring CelFileData::GetAlgorithmVersion() { return GetWStringFromGenericHdr(ALG_VERSION_PARAM_NAME); } /* * Set the number of rows of cells */ void CelFileData::SetRows(int32_t value) { SetInt32ToGenericHdrParameterList(CEL_ROWS_PARAM_NAME, value); cachedRows = value; } /* * Get the number of rows of cells. */ int32_t CelFileData::GetRows() { if (cachedRows == -1) cachedRows = GetInt32FromGenericHdrParameterList(CEL_ROWS_PARAM_NAME); return cachedRows; } /* * Set the number of cols of cells */ void CelFileData::SetCols(int32_t value) { SetInt32ToGenericHdrParameterList(CEL_COLS_PARAM_NAME, value); cachedCols = value; } bool CelFileData::IsMultiColor() { return (GetChannels().size() > 1); } WStringVector CelFileData::GetChannels() { // look for tag - "affymetrix-channel-wavelength" // if not found then set the channel to "Default Group" // else parse and return. WStringVector result; GenericDataHeader* gdh = this->GetFileHeader()->GetGenericDataHdr()->FindParent(MULTI_SCAN_ACQUISITION_DATA_TYPE); if (gdh == NULL) { result.push_back(CelDataGroupName); } else { // found the right header, now look for the parameter ParameterNameValueType nvt; if (gdh->FindNameValParam(L"affymetrix-channel-wavelength", nvt)) { std::wstring channels = nvt.ToString(); std::wstring::size_type idx = channels.find_first_of(L";", 0); while (idx != std::wstring::npos) { result.push_back(channels.substr(0, idx)); channels = channels.substr(idx+1); idx = channels.find_first_of(L";", 0); } result.push_back(channels); } // Only one channel is found, this is the case when MC scanner operates in single channel mode. if (result.size() == 1) { result.clear(); result.push_back(CelDataGroupName); } } return result; } /* * Get the number of cols of cells */ int32_t CelFileData::GetCols() { if (cachedCols == -1) cachedCols = GetInt32FromGenericHdrParameterList(CEL_COLS_PARAM_NAME); return cachedCols; } /* TODO consider throwing an exception */ /* * Read an int32_t value from the GenericDataHeader parameter list. */ int32_t CelFileData::GetInt32FromGenericHdrParameterList(const std::wstring& name) { int32_t result = 0; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(name, paramType)) { result = paramType.GetValueInt32(); } return result; } /* * Set a int32_t value into the GenericDataHeader parameter list. */ void CelFileData::SetInt32ToGenericHdrParameterList(const std::wstring& name, int32_t value) { ParameterNameValueType paramType; paramType.SetName(name); paramType.SetValueInt32(value); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } /* * Read an wstring value from the GenericDataHeader parameter list. */ std::wstring CelFileData::GetWStringFromGenericHdr(const std::wstring& name) { std::wstring result; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(name, paramType)) { result = paramType.GetValueText(); } return result; } /* * Set a wstring value into the GenericDataHeader parameter list. */ void CelFileData::SetWStringToGenericHdr(const std::wstring& name, const std::wstring value, int32_t reserve) { ParameterNameValueType paramType; paramType.SetName(name); paramType.SetValueText(value, reserve); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } /* * Determines if the cell at a given index is an outlier. * Assumes that the outlier DataSet has been read */ bool CelFileData::IsOutlier(int32_t cellIdx) { int16_t x, y; ComputeXY(cellIdx, x, y); XYCoord xy(x,y); return (outliers.find(xy) != outliers.end()); } /* * Determines if the cell at a given index is masked. * Assumes that the masked DataSet has been read */ bool CelFileData::IsMasked(int32_t cellIdx) { int16_t x, y; ComputeXY(cellIdx, x, y); XYCoord xy(x,y); return (masked.find(xy) != masked.end()); } /* * Determine the x-y coordinate given a cell index. */ void CelFileData::ComputeXY(int32_t cellIdx, int16_t& x, int16_t& y) { y = (int16_t)(cellIdx/GetCols()); x = (int16_t)(cellIdx - GetCols()*y); } /* */ std::wstring CelFileData::GetDatHeader() { std::wstring datHeader; //GenDataHdrVectorIt begin, end; GenericDataHeader* gdh = this->GetFileHeader()->GetGenericDataHdr()->FindParent(MULTI_SCAN_ACQUISITION_DATA_TYPE); if (gdh) { // found the right header, now look for the parameter ParameterNameValueType nvt; if (gdh->FindNameValParam(DAT_HEADER_PARAM_NAME, nvt)) { if (nvt.GetParameterType() == ParameterNameValueType::TextType) datHeader = nvt.GetValueText(); } else if (gdh->FindNameValParam(PARTIAL_DAT_HEADER_PARAM_NAME, nvt)) { if (nvt.GetParameterType() == ParameterNameValueType::TextType) { std::wstring partialDatHeader = nvt.GetValueText(); u_int16_t min = 0; u_int16_t max = 0; // Find the max and min parameters and append to the string. if (gdh->FindNameValParam(MAX_PIXEL_INTENSITY_PARAM_NAME, nvt)) { if (nvt.GetParameterType() == ParameterNameValueType::UInt16Type) max = nvt.GetValueUInt16(); } if (gdh->FindNameValParam(MIN_PIXEL_INTENSITY_PARAM_NAME, nvt)) { if (nvt.GetParameterType() == ParameterNameValueType::UInt16Type) min = nvt.GetValueUInt16(); } wchar_t buf[30]=L""; FormatString2(buf, 30, L"[%d..%d]", min, max); datHeader = buf; datHeader += partialDatHeader; } } } return datHeader; } /* * Find a DataSetHeader by name. */ DataSetHeader* CelFileData::FindDataSetHeader(const std::wstring& name) { DataGroupHeader* grpHdr = genericData.FindDataGroupHeader(activeChannel); if (grpHdr) { DataSetHeader* setHdr = genericData.FindDataSetHeader(grpHdr, name); if (setHdr) { return setHdr; } } return 0; } /* * Return the algorithm parameters. The algorithm parameter prefix is removed from the name. */ void CelFileData::GetAlgorithmParameters(ParameterNameValueTypeVector& algParams) { // Get all parameters that start with the algorithm parameter prefix GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->GetNameValParamsBeginsWith(ALGORITHM_PARAM_NAME_PREFIX, algParams); // Remove the prefix from the name int32_t len = (int32_t)wcslen(ALGORITHM_PARAM_NAME_PREFIX); for (ParameterNameValueTypeIt ii = algParams.begin(); ii != algParams.end(); ++ii) { std::wstring s = ii->GetName(); s.erase(0, len); ii->SetName(s); } } /* * Add an algorithm parameter. The algorithm parameter prefix will be added by the method. */ void CelFileData::AddAlgorithmParameter(ParameterNameValueType& nvt) { std::wstring s = nvt.GetName(); s.insert(0, ALGORITHM_PARAM_NAME_PREFIX); nvt.SetName(s); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(nvt); } /* * Return an algorithm parameter given a name. */ bool CelFileData::FindAlgorithmParameter(const std::wstring& name, ParameterNameValueType& param) { std::wstring paramName = name; paramName.insert(0, ALGORITHM_PARAM_NAME_PREFIX); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(paramName, param)) { int32_t len = (int32_t)wcslen(ALGORITHM_PARAM_NAME_PREFIX); std::wstring s = param.GetName(); s.erase(0, len); param.SetName(s); return true; } return false; } /* * Returns the list of parameters associated with a data set. */ ParameterNameValueTypeList CelFileData::GetDataSetParameters(const std::wstring &setName) { ParameterNameValueTypeList params; DataSet *set = genericData.DataSet(activeChannel, setName); if (set != NULL) { ParameterNameValueTypeConstIt b; ParameterNameValueTypeConstIt e; ParameterNameValueTypeConstIt it; set->Header().GetNameValIterators(b, e); for (it=b; it!=e; it++) { params.push_back(*it); } set->Delete(); set = NULL; } return params; } affxparser/src/fusion/calvin_files/data/src/CELData.h0000644000175200017520000003250514516003651023541 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CelFileData_HEADER_ #define _CelFileData_HEADER_ /*! \file CELData.h This file provides methods to access CEL data. */ #include "calvin_files/data/src/GenericData.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/Coords.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! Data Group name */ #define CelDataGroupName std::wstring(L"Default Group") /*! Name of the intensity data set */ #define CelIntensityLabel std::wstring(L"Intensity") /*! Name of the standard deviation data set */ #define CelStdDevLabel std::wstring(L"StdDev") /*! Name of the pixel data set */ #define CelPixelLabel std::wstring(L"Pixel") /*! Name of the outlier data set */ #define CelOutlierLabel std::wstring(L"Outlier") /*! Name of the mask data set */ #define CelMaskLabel std::wstring(L"Mask") /*! Cel file version */ #define CurrentCelFileVersion u_int8_t(1) /*! This is the container class for CEL data. */ class CelFileData { public: /*! Default Constructor */ CelFileData(); /*! Constructor * @param filename Name of the cel file. */ CelFileData(const std::string &filename); /*! Destructor */ ~CelFileData(); private: /*! Generic layer object */ GenericData genericData; bool setIntensityMetaData; bool setStdDevMetaData; bool setPixelMetaData; bool setOutlierMetaData; bool setMaskMetaData; int32_t intensityColumnType; /*! current wavelength which is the datagroup in this file */ std::wstring activeChannel; // DataSet cache - initialized on first use and Delete in destructor /*! Intensity DataSet */ DataSet* dpInten; /*! Stdev DataSet */ DataSet* dpStdev; /*! NumPixels DataSet */ DataSet* dpPixels; /*! Indicates whether an attempt to read the outlier data set has been made. */ bool outlierPlaneRead; /*! A set of outlier cell coordinates. */ std::set outliers; /*! Indicates whether an attempt to read the mask data set has been made. */ bool maskPlaneRead; /*! A set of masked cell coordinates. */ std::set masked; /*! keep rows from being read from the header all the time */ int32_t cachedRows; /*! keep cols from being read from the header all the time */ int32_t cachedCols; public: /*! Clear the object members */ void Clear(); /*! Check if the file exists */ bool Exists(); /*! Sets the active channel for a multi-channel CEL file. Default is single channel. */ void SetActiveChannel(const std::wstring &channel); /*! Set the file name * @param p file name */ void SetFilename(const std::string &p); /*! Get the file name * @return file name */ std::string GetFilename() const; /*! */ void SetIntensityCount(int32_t ln); /*! */ void SetStdDevCount(int32_t ln); /*! */ void SetPixelCount(int32_t ln); /*! */ void SetOutlierCount(int32_t ln); /*! */ void SetMaskCount(int32_t ln); /*! */ FileHeader* GetFileHeader() { return &genericData.Header(); } /*! Get the version of the CEL file * @return CEL file version */ u_int8_t GetVersion(); /*! Set the array type * @param value array type name */ void SetArrayType(const std::wstring& value); /*! Get the array type * @return array type name */ std::wstring GetArrayType(); /*! Set the master file * @param value master file name */ void SetMasterFileName(const std::wstring& value); /*! Get the master file * @return master file name */ std::wstring GetMasterFileName(); /*! Set the library package * @param value library package name */ void SetLibraryPackageName(const std::wstring& value); /*! Get the library package * @return library package name */ std::wstring GetLibraryPackageName(); /*! Set name of the algorithm used to generate the results. * @param value algorithm name */ void SetAlgorithmName(const std::wstring& value); /*! Get the name of the algorithm used to generate the results. * @return Algorithm name */ std::wstring GetAlgorithmName(); /*! Set version of the algorithm used to generate the results. * @param value algorithm name */ void SetAlgorithmVersion(const std::wstring& value); /* * Get the version of the algorithm used to generate the results. */ std::wstring GetAlgorithmVersion(); /*! Set the number of rows of cells on the array. * @param value Number of rows of cells. */ void SetRows(int32_t value); /*! Get the number of rows of cells on the array. * @return Number of rows of cells. */ int32_t GetRows(); /*! Set the number of columns of cells on the array. * @param value Number of columns of cells. */ void SetCols(int32_t value); /*! Get the number of columns of cells on the array. * @return Number of columns of cells. */ int32_t GetCols(); /*! Return the number of cells on the array. * This is the number of the intensity data elements and is == GetRows() * GetCols() * Stdev, NumPixels, Outlier and Masked data is optional, but if present they will have GetNumCells elements. * @return Number of cells in the array. */ int32_t GetNumCells(); /*! Return the algorithm parameters. The algorithm parameter prefix is removed from the name. * @param algParams Vector with algorithm parameters. */ void GetAlgorithmParameters(ParameterNameValueTypeVector& algParams); /*! Return an algorithm parameter given a name. * @param name Name of the parameter to find. * @param param The found parameter. * @return True if the parameter was found. */ bool FindAlgorithmParameter(const std::wstring& name, ParameterNameValueType& param); /*! Add an algorithm parameter. The algorithm parameter prefix will be added by the method. * @param nvt Algorithm parameter to add to the list. */ void AddAlgorithmParameter(ParameterNameValueType& nvt); /*! Get the cell intensity, standard deviation, number of pixels, outlier flag and mask flag. * @param cellIdx Index of the cell. * @param intensity The intensity of the cell. * @param stdev The standard deviation of the cell. * @param numPixels The number of pixels in the cell. * @param outlier The outlier flag of the cell. * @param masked The masked flag of the cell. */ void GetData(int32_t cellIdx, float& intensity, float& stdev, int16_t& numPixels, bool& outlier, bool& masked); /*! Indicates whether there are standard deviation values. * @return True if there are standard deviation values. */ bool HasStdev(); /*! Indicates whether there are number of pixel values. * @return True if there are number of pixels values. */ bool HasNumPixels(); /*! Get the intensity for a cell index. * @param cell index of the intensity to retrieve. * @return float of a particular intensity value. */ float GetIntensity(int index); /*! Get the intensities for a range of cell indexes. * @param cellIdxStart Cell index of the first intensity to retrieve. * @param count Number of intensities to retrieve. * @param values Retrieved cell intensities. * @return True if all intensities were retrieved. */ bool GetIntensities(int32_t cellIdxStart, int32_t count, FloatVector& values); /*! Get the standard deviations for a range of cell indexes. * @param cellIdxStart Cell index of the first standard deviation to retrieve. * @param count Number of cell standard deviations to retrieve. * @param values Retrieved cell standard deviations. * @return True if all standard deviations were retrieved. */ bool GetStdev(int32_t cellIdxStart, int32_t count, FloatVector& values); /*! Get the number of pixels for a range of cell indexes. * @param cellIdxStart Cell index of the first pixel count to retrieve. * @param count Number of cell pixels to retrieve. * @param values Retrieved cell pixel counts * @return True if all pixel count (number of pixels) were retrieved. */ bool GetNumPixels(int32_t cellIdxStart, int32_t count, Int16Vector& values); /*! Get the outlier flags for a range of cell indexes. * @param cellIdxStart Cell index of the first outlier flag to retrieve. * @param count Number of cell outlier flags to retrieve. * @param values Retrieved cell outlier flags * @return True if all outlier flags were retrieved. */ bool GetOutliers(int32_t cellIdxStart, int32_t count, BoolVector& values); /*! Get the mask flags for a range of cell indexes. * @param cellIdxStart Cell index of the first mask flag to retrieve. * @param count Number of cell mask flags to retrieve. * @param values Retrieved cell mask flags * @return True if all mask flags were retrieved. */ bool GetMasked(int32_t cellIdxStart, int32_t count, BoolVector& values); /*! Get the coordinates of all outlier cells (i.e. outlier flag is true). * @param coords Cell coordinates. */ void GetOutlierCoords(XYCoordVector& coords); /*! Get the coordinates of all masked cells (i.e. mask flag is true). * @param coords Cell coordinates. */ void GetMaskedCoords(XYCoordVector& coords); /*! Return a reference to the generic layer object * @return Generic layer object */ GenericData& GetGenericData() { return genericData; } /*! Return a wstring for DatHeader * @return DatHeader */ std::wstring GetDatHeader(); /*! Determine the x-y coordinate given a cell index. * @param cellIdx Cell index * @param x Cell x-coordinate * @param y Cell y-coordinate */ void ComputeXY(int32_t cellIdx, int16_t& x, int16_t& y); /*! Returns the list of parameters associated with a data set. * @param setName The data set name * @return The list of parameters */ ParameterNameValueTypeList GetDataSetParameters(const std::wstring &setName); /*! Is this a multi-color CEL file? * @return True if it is multi-color */ bool IsMultiColor(); /*! Returns a list of the channel (ie data group) names * @return list of channel names */ WStringVector GetChannels(); protected: /* Close the data set pointers. Used when switching channels. */ void CloseDataSets(); /* Clear the mask and outlier lists and reset to unread state. Used when switching channels. */ void ResetMaskAndOutliers(); private: /*! */ void SetFileCreationTime(const std::wstring &f); /*! */ std::wstring GetFileCreationTime() const; void InsertDataSetHeader(const DataSetHeader &hdr); void UpdateDataSetRowCount(const DataSetHeader &hdr); /*! Set the version of the CEL file. Used by the reader. * @param value CEL file version */ void SetVersion(u_int8_t value); /*! Prepare to read intensity data */ void PrepareIntensityPlane(); /*! Prepare to read the standard deviation data */ void PrepareStdevPlane(); /*! Prepare to read the number of pixel data */ void PrepareNumPixelPlane(); /*! Prepare to read the outlier data */ void PrepareOutlierPlane(); /*! Prepare to read the mask data */ void PrepareMaskedPlane(); /*! Prepare to read all data */ void PrepareAllPlanes(); /*! Read an int32_t value from the GenericDataHeader parameter list. * @param name Name of the parameter to read * @return int32_t value of the named parameter */ int32_t GetInt32FromGenericHdrParameterList(const std::wstring& name); /*! Set a int32_t value into the GenericDataHeader parameter list. * @param name of the parameter to set * @param value Int32_t value to set */ void SetInt32ToGenericHdrParameterList(const std::wstring& name, int32_t value); /*! Read an wstring value from the GenericDataHeader parameter list. * @param name Name of the parameter to read * @return wstring value of the named parameter */ std::wstring GetWStringFromGenericHdr(const std::wstring& name); /*! Set a wstring value into the GenericDataHeader parameter list. * @param name of the parameter to set * @param value wstring value to set * @param reserve The total number of characters to reserve for the value. -1 indicates not to reserve any extra space. */ void SetWStringToGenericHdr(const std::wstring& name, const std::wstring value, int32_t reserve=-1); /*! Check if the cell is an outlier (outlier flag is true) * @param cellIdx Cell index * @return True if the cell outlier flag is true */ bool IsOutlier(int32_t cellIdx); /*! Check if the cell is masked (mask flag is true) * @param cellIdx cell index * @return True if the cell mask flag is true */ bool IsMasked(int32_t cellIdx); /*! Find a DataSetHeader by name. * @param name DataSetHeader name * @return Pointer to a DataSetHeader with name parameter, otherwise 0 */ DataSetHeader* FindDataSetHeader(const std::wstring& name); friend class CelFileReader; friend class CelFileWriter; friend class MultiChannelCelFileWriter; }; } #endif // _CelFileData_HEADER_ affxparser/src/fusion/calvin_files/data/src/CHPBackgroundZone.cpp0000644000175200017520000000411014516003651026134 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CHPBackgroundZone.h" // using namespace affymetrix_calvin_io; CHPBackgroundZone::CHPBackgroundZone() { Clear(); } CHPBackgroundZone::CHPBackgroundZone(float x, float y, float bg, float smooth) { centerX = x; centerY = y; background = bg; smoothFactor = smooth; } CHPBackgroundZone::~CHPBackgroundZone() {} void CHPBackgroundZone::Clear() { centerX = 0.0; centerY = 0.0; background = 0.0; smoothFactor = 0.0; } CHPBackgroundZone CHPBackgroundZone::operator=(CHPBackgroundZone zn) { centerX = zn.GetCenterX(); centerY = zn.GetCenterY(); background = zn.GetBackground(); smoothFactor = zn.GetSmoothFactor(); return *this; } float CHPBackgroundZone::GetCenterX() const { return centerX; } float CHPBackgroundZone::GetCenterY() const { return centerY; } float CHPBackgroundZone::GetBackground() const { return background; } float CHPBackgroundZone::GetSmoothFactor() const { return smoothFactor; } void CHPBackgroundZone::SetCenterX(float p) { centerX = p; } void CHPBackgroundZone::SetCenterY(float p) { centerY = p; } void CHPBackgroundZone::SetBackground(float p) { background = p; } void CHPBackgroundZone::SetSmoothFactor(float p) { smoothFactor = p; } affxparser/src/fusion/calvin_files/data/src/CHPBackgroundZone.h0000644000175200017520000000464014516003651025611 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPBackgroundZone_HEADER_ #define _CHPBackgroundZone_HEADER_ #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/data/src/GenericData.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/Coords.h" // #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class stores a zone's background value */ class CHPBackgroundZone { private: /*! The X coordinate of the center of the zone. */ float centerX; /*! The Y coordinate of the center of the zone. */ float centerY; /*! The zone's background value */ float background; /*! The smoothing factor used to calculate the zone backgrounds */ float smoothFactor; public: CHPBackgroundZone(); CHPBackgroundZone(float x, float y, float bg, float smooth); ~CHPBackgroundZone(); void Clear(); /*! Assignment operator * @param zn The zone to copy * @return The new zone object */ CHPBackgroundZone operator=(CHPBackgroundZone zn); float GetCenterX() const; float GetCenterY() const; float GetBackground() const; float GetSmoothFactor() const; void SetCenterX(float p); void SetCenterY(float p); void SetBackground(float p); void SetSmoothFactor(float p); }; /*! An STL list of zones */ typedef std::vector CHPBackgroundZoneVector; /*! iterator of CHPBackgroundZones */ typedef std::vector::iterator CHPBackgroundZoneVectorIt; } #endif // _CHPBackgroundZone_HEADER_ affxparser/src/fusion/calvin_files/data/src/CHPData.cpp0000644000175200017520000005331714516003651024107 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CHPData.h" // #include "calvin_files/data/src/DataSetHeader.h" #include "calvin_files/utils/src/StringUtils.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; // Constant column names. #define CallColName std::wstring( L"Call") #define ScoreColName std::wstring( L"Score") #define BackgroundColName std::wstring( L"Background") #define ConfidenceColName std::wstring( L"Confidence") #define RAS1ColName std::wstring( L"RAS1") #define RAS2ColName std::wstring( L"RAS2") #define AAColName std::wstring( L"AA Call p-value") #define ABColName std::wstring( L"AB Call p-value") #define BBColName std::wstring( L"BB Call p-value") #define NoCallColName std::wstring( L"No Call p-value") #define ProbeSetNameColName std::wstring( L"Probe Set Name") #define DetectionColName std::wstring( L"Detection") #define DetectionPValueColName std::wstring( L"Detection p-value") #define SignalColName std::wstring( L"Signal") #define NumberPairsColName std::wstring( L"Number of Pairs") #define NumberPairsUsedColName std::wstring( L"Number of Pairs Used") #define ChangeColName std::wstring( L"Change") #define ChangePValueColName std::wstring( L"Change p-value") #define SignalLogRatioColName std::wstring( L"Signal Log Ratio") #define SignalLogRatioLowColName std::wstring( L"Signal Log Ratio Low") #define SignalLogRatioHighColName std::wstring( L"Signal Log Ratio High") #define CommonPairsColName std::wstring( L"Common Pairs") #define CenterXColName std::wstring( L"Center X") #define CenterYColName std::wstring( L"Center Y") #define SmoothFactorColName std::wstring( L"Smooth Factor") #define PositionColName std::wstring( L"Position") #define ReasonColName std::wstring( L"Reason") #define ForceCallColName std::wstring( L"Force Call") #define OriginalCallColName std::wstring( L"Original Call") CHPData::CHPData() { entriesExp = 0; entriesGeno = 0; entriesUniv = 0; entriesReseq = 0; bgZones = 0; forceSet = 0; origSet = 0; wideProbeSetNames = false; Clear(); } CHPData::CHPData(const std::string& filename, const std::string &assayType) { entriesExp = 0; entriesGeno = 0; entriesUniv = 0; entriesReseq = 0; bgZones = 0; forceSet = 0; origSet = 0; wideProbeSetNames = false; Clear(); SetFilename(filename); std::wstring groupName; if (assayType == CHP_EXPRESSION_ASSAY_TYPE) groupName = CHP_EXPR_GROUP; else if (assayType == CHP_RESEQUENCING_ASSAY_TYPE) groupName = CHP_RESEQ_GROUP; else if (assayType == CHP_GENOTYPING_ASSAY_TYPE) groupName = CHP_GENO_GROUP; else if (assayType == CHP_UNIVERSAL_ASSAY_TYPE) groupName = CHP_UNIV_GROUP; else return; DataGroupHeader dcHdr(groupName); genericData.Header().AddDataGroupHdr(dcHdr); genericData.Header().GetGenericDataHdr()->SetFileTypeId(assayType); DataGroupHeader dcHdrBg(CHP_BG_ZONE_GROUP); genericData.Header().AddDataGroupHdr(dcHdrBg); // Now add the force and orig sets for reseq designs. if (assayType == CHP_RESEQUENCING_ASSAY_TYPE) { DataGroupHeader dcHdrForce(CHP_RESEQ_FORCE_CALL_GROUP); genericData.Header().AddDataGroupHdr(dcHdrForce); DataGroupHeader dcHdrOrig(CHP_RESEQ_ORIG_CALL_GROUP); genericData.Header().AddDataGroupHdr(dcHdrOrig); } } void CHPData::GetEntry(int32_t row, CHPGenotypeEntry& e) { PrepareGenoEntryDataSet(); if (entriesGeno && entriesGeno->IsOpen()) { std::string probeSetName; if (wideProbeSetNames == false) entriesGeno->GetData(row, 0, probeSetName); else { std::wstring wprobeSetName; entriesGeno->GetData(row, 0, wprobeSetName); probeSetName = StringUtils::ConvertWCSToMBS(wprobeSetName); } e.SetProbeSetName(probeSetName); u_int8_t call = 0; entriesGeno->GetData(row, 1, call); e.SetCall(call); float confidence = 0.0; entriesGeno->GetData(row, 2, confidence); e.SetConfidence(confidence); float ras1 = 0.0; entriesGeno->GetData(row, 3, ras1); e.SetRAS1(ras1); float ras2 = 0.0; entriesGeno->GetData(row, 4, ras2); e.SetRAS2(ras2); float aaCall = 0.0; entriesGeno->GetData(row, 5, aaCall); e.SetAACall(aaCall); float abCall = 0.0; entriesGeno->GetData(row, 6, abCall); e.SetABCall(abCall); float bbCall = 0.0; entriesGeno->GetData(row, 7, bbCall); e.SetBBCall(bbCall); float noCall = 0.0; entriesGeno->GetData(row, 8, noCall); e.SetNoCall(noCall); } } void CHPData::GetEntry(int32_t row, CHPExpressionEntry& e) { PrepareExprEntryDataSet(); if (entriesExp && entriesExp->IsOpen()) { int colIndex = 0; std::string probeSetName; if (wideProbeSetNames == false) entriesExp->GetData(row, colIndex++, probeSetName); else { std::wstring wprobeSetName; entriesExp->GetData(row, colIndex++, wprobeSetName); probeSetName = StringUtils::ConvertWCSToMBS(wprobeSetName); } e.SetProbeSetName(probeSetName); u_int8_t detection = 0; entriesExp->GetData(row, colIndex++, detection); e.SetDetection(detection); float detectionPValue = 0.0; entriesExp->GetData(row, colIndex++, detectionPValue); e.SetDetectionPValue(detectionPValue); float signal = 0.0; entriesExp->GetData(row, colIndex++, signal); e.SetSignal(signal); u_int16_t numPairs = 0; entriesExp->GetData(row, colIndex++, numPairs); e.SetNumPairs(numPairs); u_int16_t numPairsUsed = 0; entriesExp->GetData(row, colIndex++, numPairsUsed); e.SetNumPairsUsed(numPairsUsed); e.SetHasComparisonData(false); if (entriesExp->Cols() > colIndex) { e.SetHasComparisonData(true); u_int8_t change = 0; entriesExp->GetData(row, colIndex++, change); e.SetChange(change); float changePValue = 0.0; entriesExp->GetData(row, colIndex++, changePValue); e.SetChangePValue(changePValue); float sigLogRatio = 0.0; entriesExp->GetData(row, colIndex++, sigLogRatio); e.SetSigLogRatio(sigLogRatio); float sigLogRatioLo = 0.0; entriesExp->GetData(row, colIndex++, sigLogRatioLo); e.SetSigLogRatioLo(sigLogRatioLo); float sigLogRatioHi = 0.0; entriesExp->GetData(row, colIndex++, sigLogRatioHi); e.SetSigLogRatioHi(sigLogRatioHi); u_int16_t commonPairs = 0; entriesExp->GetData(row, colIndex++, commonPairs); e.SetCommonPairs(commonPairs); } } } void CHPData::GetEntry(int32_t row, CHPUniversalEntry& e) { PrepareUnivEntryDataSet(); if (entriesUniv && entriesUniv->IsOpen()) { float background = 0.0; entriesUniv->GetData(row, 0, background); e.SetBackground(background); } } void CHPData::GetEntry(int32_t row, CHPReseqEntry& e) { PrepareReseqEntryDataSet(); if (entriesReseq && entriesReseq->IsOpen()) { int8_t call; entriesReseq->GetData(row, 0, call); e.call=call; entriesReseq->GetData(row, 1, e.score); } } void CHPData::PrepareGenoEntryDataSet() { if (entriesGeno == 0) { entriesGeno = genericData.DataSet(CHP_GENO_GROUP, CHP_GENO_GROUP); if (entriesGeno) { entriesGeno->Open(); wideProbeSetNames = (entriesGeno->Header().GetColumnInfo(0).GetColumnType() == UnicodeCharColType); } } } void CHPData::PrepareExprEntryDataSet() { if (entriesExp == 0) { entriesExp = genericData.DataSet(CHP_EXPR_GROUP, CHP_EXPR_GROUP); if (entriesExp) { entriesExp->Open(); wideProbeSetNames = (entriesExp->Header().GetColumnInfo(0).GetColumnType() == UnicodeCharColType); } } } void CHPData::PrepareUnivEntryDataSet() { if (entriesUniv == 0) { entriesUniv = genericData.DataSet(CHP_UNIV_GROUP, CHP_UNIV_GROUP); if (entriesUniv) { entriesUniv->Open(); } } } void CHPData::PrepareReseqEntryDataSet() { if (entriesReseq == 0) { entriesReseq = genericData.DataSet(CHP_RESEQ_GROUP, CHP_RESEQ_GROUP); if (entriesReseq) { entriesReseq->Open(); } } } void CHPData::AddColumns(DataSetHeader& hdr, bool hasCompData) { if(genericData.Header().GetGenericDataHdr()->GetFileTypeId() == CHP_EXPRESSION_ASSAY_TYPE) AddExprColumns(hdr, hasCompData); if(genericData.Header().GetGenericDataHdr()->GetFileTypeId() == CHP_GENOTYPING_ASSAY_TYPE) AddGenoColumns(hdr); else if(genericData.Header().GetGenericDataHdr()->GetFileTypeId() == CHP_UNIVERSAL_ASSAY_TYPE) AddUnivColumns(hdr); else if(genericData.Header().GetGenericDataHdr()->GetFileTypeId() == CHP_RESEQUENCING_ASSAY_TYPE) AddReseqColumns(hdr); } void CHPData::AddReseqColumns(DataSetHeader& hdr) { hdr.SetName(CHP_RESEQ_GROUP); //call - char hdr.AddByteColumn(CallColName); //Score - float hdr.AddFloatColumn(ScoreColName); } void CHPData::AddUnivColumns(DataSetHeader& hdr) { hdr.SetName(CHP_UNIV_GROUP); //Background - float hdr.AddFloatColumn(BackgroundColName); } void CHPData::AddGenoColumns(DataSetHeader& hdr) { hdr.SetName(CHP_GENO_GROUP); //Probeset name - string hdr.AddAsciiColumn(ProbeSetNameColName, maxProbeSetName); //Call - unsigned char hdr.AddUByteColumn(CallColName); //Confidence - float hdr.AddFloatColumn(ConfidenceColName); //RAS1 - float hdr.AddFloatColumn(RAS1ColName); //RAS2 - float hdr.AddFloatColumn(RAS2ColName); //AA Call - float hdr.AddFloatColumn(AAColName); //AB Call - float hdr.AddFloatColumn(ABColName); //BB Call - float hdr.AddFloatColumn(BBColName); //No Call - float hdr.AddFloatColumn(NoCallColName); } void CHPData::AddExprColumns(DataSetHeader& hdr, bool hasCompData) { hdr.SetName(CHP_EXPR_GROUP); //Probeset name - string hdr.AddAsciiColumn(ProbeSetNameColName, maxProbeSetName); //Detection - unsigned char hdr.AddUByteColumn(DetectionColName); //Detection p-value - float hdr.AddFloatColumn(DetectionPValueColName); //Signal - float hdr.AddFloatColumn(SignalColName); //Number of pairs - unsigned short hdr.AddUShortColumn(NumberPairsColName); //Number of pairs used - unsigned short hdr.AddUShortColumn(NumberPairsUsedColName); if (hasCompData == true) { //Change - unsigned char hdr.AddUByteColumn(ChangeColName); //Change p-value - float hdr.AddFloatColumn(ChangePValueColName); //Signal Log Ratio - float hdr.AddFloatColumn(SignalLogRatioColName); //Signal Log Ratio Low - float hdr.AddFloatColumn(SignalLogRatioLowColName); //Signal Log Ratio High - float hdr.AddFloatColumn(SignalLogRatioHighColName); //Common Pairs - unsigned short hdr.AddUShortColumn(CommonPairsColName); } } /// backwards compatibility for gcos like calls std::string CHPData::GetAssayType() { return genericData.Header().GetGenericDataHdr()->GetFileTypeId(); } void CHPData::GetBackgroundZones(int32_t row, int32_t rowCnt, CHPBackgroundZoneVector& zones) { CHPBackgroundZone z; for(int i = row; i < rowCnt; i++) { GetBackgroundZone(i, z); zones.push_back(z); z.Clear(); } } void CHPData::GetBackgroundZone(int32_t row, CHPBackgroundZone& zone) { PrepareBackgroundZoneDataSet(); if (bgZones && bgZones->IsOpen()) { float centerX = 0.0; bgZones->GetData(row, 0, centerX); zone.SetCenterX(centerX); float centerY = 0.0; bgZones->GetData(row, 1, centerY); zone.SetCenterY(centerY); float background = 0.0; bgZones->GetData(row, 2, background); zone.SetBackground(background); float smoothFactor = 0.0; bgZones->GetData(row, 3, smoothFactor); zone.SetSmoothFactor(smoothFactor); } } void CHPData::GetForceCall(int32_t row, CHPReseqForceCall& force) { PrepareForceDataSet(); if (forceSet && forceSet->IsOpen()) { forceSet->GetData(row, 0, force.position); int8_t value; forceSet->GetData(row, 1, value); force.call = value; forceSet->GetData(row, 2, value); force.reason = value; } } void CHPData::GetOrigCall(int32_t row, CHPReseqOrigCall &orig) { PrepareOrigDataSet(); if (origSet && origSet->IsOpen()) { origSet->GetData(row, 0, orig.position); int8_t value; origSet->GetData(row, 1, value); orig.call = value; } } CHPData::~CHPData() { Clear(); } void CHPData::Clear() { if (entriesGeno){ entriesGeno->Delete(); entriesGeno = 0; } if (entriesExp){ entriesExp->Delete(); entriesExp = 0; } if (entriesUniv){ entriesUniv->Delete(); entriesUniv = 0; } if (entriesReseq){ entriesReseq->Delete(); entriesReseq = 0; } if (bgZones){ bgZones->Delete(); bgZones = 0; } if (forceSet) { forceSet->Delete(); forceSet = 0; } if (origSet) { origSet->Delete(); origSet = 0; } genericData.Header().Clear(); cachedRows = -1; cachedCols = -1; } void CHPData::SetFilename(const std::string &p) { genericData.Header().SetFilename(p); } std::string CHPData::GetFilename() const { return ((GenericData&)genericData).Header().GetFilename(); } u_int8_t CHPData::GetVersion() { return genericData.Header().GetVersion(); } u_int8_t CHPData::GetMagic() { return genericData.Header().GetMagicNumber(); } int32_t CHPData::GetEntryCount() { DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(0); DataSetHeader dpHdr = dcHdr->GetDataSet(0); return dpHdr.GetRowCnt(); } void CHPData::SetEntryCount(int32_t ln, int32_t maxln, bool hasCompData) { maxProbeSetName = maxln; DataSetHeader dpHdr; dpHdr.SetRowCnt(ln); AddColumns(dpHdr, hasCompData); DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(0); dcHdr->AddDataSetHdr(dpHdr); } int32_t CHPData::GetBackgroundZoneCnt() { DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(1); DataSetHeader dpHdr = dcHdr->GetDataSet(0); return dpHdr.GetRowCnt(); } void CHPData::SetBackgroundZoneCnt(int32_t ln) { DataSetHeader dpHdr; dpHdr.SetRowCnt(ln); dpHdr.SetName(CHP_BG_ZONE_GROUP); //center X coord - float dpHdr.AddFloatColumn(CenterXColName); //center Y coord - float dpHdr.AddFloatColumn(CenterYColName); //background - float dpHdr.AddFloatColumn(BackgroundColName); //smoothing factor - float dpHdr.AddFloatColumn(SmoothFactorColName); DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(1); dcHdr->AddDataSetHdr(dpHdr); } int32_t CHPData::GetForceCnt() { DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(2); DataSetHeader dpHdr = dcHdr->GetDataSet(0); return dpHdr.GetRowCnt(); } void CHPData::SetForceCnt(int32_t ln) { DataSetHeader dpHdr; dpHdr.SetRowCnt(ln); dpHdr.SetName(CHP_RESEQ_FORCE_CALL_GROUP); //position - int dpHdr.AddIntColumn(PositionColName); //call - byte dpHdr.AddByteColumn(ForceCallColName); //reason - byte dpHdr.AddByteColumn(ReasonColName); DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(2); dcHdr->AddDataSetHdr(dpHdr); } int32_t CHPData::GetOrigCnt() { DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(3); DataSetHeader dpHdr = dcHdr->GetDataSet(0); return dpHdr.GetRowCnt(); } void CHPData::SetOrigCnt(int32_t ln) { DataSetHeader dpHdr; dpHdr.SetRowCnt(ln); dpHdr.SetName(CHP_RESEQ_ORIG_CALL_GROUP); //position - int dpHdr.AddIntColumn(PositionColName); //call - byte dpHdr.AddByteColumn(OriginalCallColName); DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(3); dcHdr->AddDataSetHdr(dpHdr); } std::wstring CHPData::GetArrayType() { return GetWStringFromGenericHdr(ARRAY_TYPE_PARAM_NAME); } void CHPData::SetArrayType(const std::wstring& value) { SetWStringToGenericHdr(ARRAY_TYPE_PARAM_NAME, value, ARRAY_TYPE_MAX_LEN); } std::wstring CHPData::GetAlgName() { return GetWStringFromGenericHdr(ALGORITHM_NAME_PARAM_NAME); } void CHPData::SetAlgName(const std::wstring& value) { SetWStringToGenericHdr(ALGORITHM_NAME_PARAM_NAME, value); } std::wstring CHPData::GetParentCell() { return GetWStringFromGenericHdr(CHP_PARENT_CELL); } void CHPData::SetParentCell(const std::wstring& value) { SetWStringToGenericHdr(CHP_PARENT_CELL, value); } std::wstring CHPData::GetAlgVersion() { return GetWStringFromGenericHdr(ALG_VERSION_PARAM_NAME); } void CHPData::SetAlgVersion(const std::wstring& value) { SetWStringToGenericHdr(ALG_VERSION_PARAM_NAME, value); } std::wstring CHPData::GetProgId() { return GetWStringFromGenericHdr(CHP_PROGID); } void CHPData::SetProgId(const std::wstring& value) { SetWStringToGenericHdr(CHP_PROGID, value); } ParameterNameValueType CHPData::GetAlgParam(const std::wstring& tag) { std::wstring name = ALGORITHM_PARAM_NAME_PREFIX_S + tag; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->FindNameValParam(name, paramType); ParameterNameValueType type = paramType; type.SetName(tag); return type; } ParameterNameValueType CHPData::GetChipSum(const std::wstring& tag) { std::wstring name = CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S + tag; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->FindNameValParam(name, paramType); ParameterNameValueType type = paramType; type.SetName(tag); return type; } ParameterNameValueTypeVector CHPData::GetAlgParams() { ParameterNameValueTypeVector nvt; ParameterNameValueTypeIt begin, end; ParameterNameValueType param; genericData.Header().GetGenericDataHdr()->GetNameValIterators(begin, end); while(begin != end) { std::wstring key = begin->GetName(); if(key.compare(0, ALGORITHM_PARAM_NAME_PREFIX_S.size(),ALGORITHM_PARAM_NAME_PREFIX_S) == 0) { param = *begin; key.erase(0, ALGORITHM_PARAM_NAME_PREFIX_S.size()); param.SetName(key); nvt.push_back(param); } begin++; } return nvt; } void CHPData::AddAlgParam(const std::wstring& name, const std::wstring& param) { std::wstring paramName = ALGORITHM_PARAM_NAME_PREFIX_S + name; SetWStringToGenericHdr(paramName, param); } void CHPData::AddAlgParam(const std::wstring& name, int param) { std::wstring paramName = ALGORITHM_PARAM_NAME_PREFIX_S + name; SetInt32ToGenericHdr(paramName, param); } void CHPData::AddAlgParam(const std::wstring& name, float param) { std::wstring paramName = ALGORITHM_PARAM_NAME_PREFIX_S + name; SetFloatToGenericHdr(paramName, param); } ParameterNameValueTypeVector CHPData::GetChipSums() { ParameterNameValueTypeVector nvt; ParameterNameValueTypeIt begin, end; ParameterNameValueType param; genericData.Header().GetGenericDataHdr()->GetNameValIterators(begin, end); while(begin != end) { std::wstring key = begin->GetName(); if(key.compare(0, CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S.size(),CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S) == 0) { param = *begin; key.erase(0, CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S.size()); param.SetName(key); nvt.push_back(param); } begin++; } return nvt; } void CHPData::AddChipSum(const std::wstring& name, const std::wstring& param) { std::wstring paramName = CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S + name; SetWStringToGenericHdr(paramName, param); } void CHPData::AddChipSum(const std::wstring& name, float param) { std::wstring paramName = CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S + name; SetFloatToGenericHdr(paramName, param); } void CHPData::AddChipSum(const std::wstring& name, int32_t param) { std::wstring paramName = CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S + name; SetInt32ToGenericHdr(paramName, param); } int32_t CHPData::GetRows() { if (cachedRows == -1) cachedRows = GetInt32FromGenericHdr(CEL_ROWS_PARAM_NAME); return cachedRows; } void CHPData::SetRows(int32_t value) { SetInt32ToGenericHdr(CEL_ROWS_PARAM_NAME, value); cachedCols = value; } int32_t CHPData::GetCols() { if (cachedCols == -1) cachedCols = GetInt32FromGenericHdr(CEL_COLS_PARAM_NAME); return cachedCols; } void CHPData::SetCols(int32_t value) { SetInt32ToGenericHdr(CEL_COLS_PARAM_NAME, value); cachedCols = value; } std::wstring CHPData::GetWStringFromGenericHdr(const std::wstring& name) { std::wstring result; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(name, paramType)) { result = paramType.GetValueText(); } return result; } void CHPData::SetWStringToGenericHdr(const std::wstring& name, const std::wstring value, int32_t reserve) { ParameterNameValueType paramType; paramType.SetName(name); paramType.SetValueText(value, reserve); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } int32_t CHPData::GetInt32FromGenericHdr(const std::wstring& name) { int32_t result = 0; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(name, paramType)) { result = paramType.GetValueInt32(); } return result; } void CHPData::SetInt32ToGenericHdr(const std::wstring& name, int32_t value) { ParameterNameValueType paramType; paramType.SetName(name); paramType.SetValueInt32(value); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } void CHPData::SetFloatToGenericHdr(const std::wstring& name, float value) { ParameterNameValueType paramType; paramType.SetName(name); paramType.SetValueFloat(value); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } void CHPData::PrepareBackgroundZoneDataSet() { if (bgZones == 0) { bgZones = genericData.DataSet(CHP_BG_ZONE_GROUP, CHP_BG_ZONE_GROUP); if (bgZones) { bgZones->Open(); } } } void CHPData::PrepareForceDataSet() { if (forceSet == 0) { forceSet = genericData.DataSet(CHP_RESEQ_FORCE_CALL_GROUP, CHP_RESEQ_FORCE_CALL_GROUP); if (forceSet) { forceSet->Open(); } } } void CHPData::PrepareOrigDataSet() { if (origSet == 0) { origSet = genericData.DataSet(CHP_RESEQ_ORIG_CALL_GROUP, CHP_RESEQ_ORIG_CALL_GROUP); if (origSet) { origSet->Open(); } } } affxparser/src/fusion/calvin_files/data/src/CHPData.h0000644000175200017520000002471414516003651023553 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPData_HEADER_ #define _CHPData_HEADER_ /*! \file CHPData.h Defines a class to use as a base class for older CHP file data (expression, genotyping, reseq, universal). */ #include "calvin_files/data/src/CHPBackgroundZone.h" #include "calvin_files/data/src/CHPExpressionEntry.h" #include "calvin_files/data/src/CHPGenotypeEntry.h" #include "calvin_files/data/src/CHPReseqEntry.h" #include "calvin_files/data/src/CHPUniversalEntry.h" #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/data/src/GenericData.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/Coords.h" // #include #include // namespace affymetrix_calvin_io { /*! The id for the expression CHP files. */ #define CHP_EXPRESSION_ASSAY_TYPE std::string("affymetrix-expression-probeset-analysis") /*! The id for the expression CHP file data group. */ #define CHP_EXPR_GROUP std::wstring(L"Expression Results") /*! The id for the resequencing CHP files. */ #define CHP_RESEQUENCING_ASSAY_TYPE std::string("affymetrix-resequencing-probeset-analysis") /*! The id for the resequencing CHP file data group. */ #define CHP_RESEQ_GROUP std::wstring(L"Resequencing Results") /*! The id for the genotyping CHP files. */ #define CHP_GENOTYPING_ASSAY_TYPE std::string("affymetrix-genotyping-probeset-analysis") /*! The id for the genotyping CHP file data group. */ #define CHP_GENO_GROUP std::wstring(L"Genotyping Results") /*! The id for the universal CHP files. */ #define CHP_UNIVERSAL_ASSAY_TYPE std::string("affymetrix-universal-probeset-analysis") /*! The id for the universal CHP file data group. */ #define CHP_UNIV_GROUP std::wstring(L"Universal Results") /*! The id for the prog ID. */ #define CHP_PROGID std::wstring(L"affymetrix-progid") /*! The id for the parent cel file. */ #define CHP_PARENT_CELL std::wstring(L"affymetrix-parent-celfile") /*! The group name for the background zone group. */ #define CHP_BG_ZONE_GROUP std::wstring(L"Background Zone Data") /*! The group name for the force call group (for resequencing only). */ #define CHP_RESEQ_FORCE_CALL_GROUP std::wstring(L"Force Call Data") /*! The group name for the orig call group (for resequencing only). */ #define CHP_RESEQ_ORIG_CALL_GROUP std::wstring(L"Orig Call Data") /*! Defines a base class for older CHP file data. */ class CHPData { public: /*! Constructor */ CHPData(); /*! Constructor with file and type. * @param filename The name of the CHP file. * @param assayType The type of data in the CHP file. */ CHPData(const std::string& filename, const std::string &assayType); /*! Destructor */ virtual ~CHPData(); protected: /*! Flag indicating if the probe set names were stored in wide character format. */ bool wideProbeSetNames; /*! The maximum length of a probe set name. */ int maxProbeSetName; /*! The generic file data object. */ GenericData genericData; /*! keep rows from being read from the header all the time */ int32_t cachedRows; /*! keep cols from being read from the header all the time */ int32_t cachedCols; /*! expression entries DataSet */ DataSet* entriesExp; /*! genotyping entries DataSet */ DataSet* entriesGeno; /*! universal entries DataSet */ DataSet* entriesUniv; /*! resequencing entries DataSet */ DataSet* entriesReseq; /*! chp background zones DataSet */ DataSet* bgZones; /*! chp force call DataSet */ DataSet* forceSet; /*! chp orig DataSet */ DataSet* origSet; public: /*! Clears the members. */ void Clear(); /*! Sets the name of the CHP file. */ void SetFilename(const std::string &p); /*! The name of the CHP file. */ std::string GetFilename() const; /*! Gets the version in the file. */ u_int8_t GetVersion(); /*! Gets the files magic number. */ u_int8_t GetMagic(); /*! Gets the number of rows of features on the array. */ int32_t GetRows(); /*! Sets the number of rows of features on the array. */ void SetRows(int32_t value); /*! Gets the number of columns of features on the array. */ int32_t GetCols(); /*! Sets the number of columns of features on the array. */ void SetCols(int32_t value); /*! Gets the CHP file prog Id. */ std::wstring GetProgId(); /*! Sets the prog ID for the CHP file. */ void SetProgId(const std::wstring& value); /*! Sets the array type */ std::wstring GetArrayType(); /*! Gets the assay type */ std::string GetAssayType(); /*! Sets the array type. */ void SetArrayType(const std::wstring& value); /*! Gets the algorithm name */ std::wstring GetAlgName(); /*! Sets the algorithm name. */ void SetAlgName(const std::wstring& value); /*! Gets the name of the parent CEL file. */ std::wstring GetParentCell(); /*! Sets the name of the parent CEL file */ void SetParentCell(const std::wstring& value); /*! Gets the algorithm version. */ std::wstring GetAlgVersion(); /*! Sets the algorithm version. */ void SetAlgVersion(const std::wstring& value); /*! Adds a parameter to the alg parameters section */ void AddAlgParam(const std::wstring& name, const std::wstring& param); /*! Adds a parameter to the alg parameters section */ void AddAlgParam(const std::wstring& name, int param); /*! Adds a parameter to the alg parameters section */ void AddAlgParam(const std::wstring& name, float param); /*! Gets the alg parameters */ ParameterNameValueTypeVector GetAlgParams(); /*! Gets a single algorithm parameter by name. */ ParameterNameValueType GetAlgParam(const std::wstring& tag); /*! Adds a parameter to the chip summary section */ void AddChipSum(const std::wstring& name, const std::wstring& param); /*! Adds a parameter to the chip summary section */ void AddChipSum(const std::wstring& name, float param); /*! Adds a parameter to the chip summary section */ void AddChipSum(const std::wstring& name, int param); /*! Gets all the chip summary parameters */ ParameterNameValueTypeVector GetChipSums(); /*! Gets a chip summary parameter by name */ ParameterNameValueType GetChipSum(const std::wstring& tag); /*! Gets the file header */ FileHeader* GetFileHeader() { return &genericData.Header(); } /*! Gets the file data object. */ GenericData& GetGenericData() { return genericData; } // should be a friend method only /*! Sets the number of entries (probe sets) * @param ln The number of probe sets. * @param maxln The maximum length of a probe set name. * @param hasCompData A flag for expression results, true if comparison data exists. */ void SetEntryCount(int32_t ln, int32_t maxln, bool hasCompData = false); /*! Gets the number of entries (probe sets) */ int32_t GetEntryCount(); /*! Gets the number of background zones. */ int32_t GetBackgroundZoneCnt(); /*! Sets the number of background zones. */ void SetBackgroundZoneCnt(int32_t ln); /*! Gets CHP background zone value * @param row The row from which to start copying * @param zone The data object to be filled */ void GetBackgroundZone(int32_t row, CHPBackgroundZone& zone); /*! Gets the background zones. */ void GetBackgroundZones(int32_t row, int32_t rowCnt, CHPBackgroundZoneVector& zones); /*! Gets the expression entry (probe set). */ void GetEntry(int32_t row, CHPUniversalEntry& e); /*! Gets the expression entry (probe set). */ void GetEntry(int32_t row, CHPGenotypeEntry& e); /*! Gets the expression entry (probe set). */ void GetEntry(int32_t row, CHPExpressionEntry& e); /*! Gets the resequence entry. */ void GetEntry(int32_t row, CHPReseqEntry& e); /*! Gets the number of force calls. */ int32_t GetForceCnt(); /*! Sets the number of force calls. */ void SetForceCnt(int32_t ln); /*! Gets the force call value * @param row The row index * @param force The data object to be filled */ void GetForceCall(int32_t row, CHPReseqForceCall& force); /*! Gets the number of orig calls. */ int32_t GetOrigCnt(); /*! Sets the number of orig calls. */ void SetOrigCnt(int32_t ln); /*! Gets the original call value from the orig set. * @param row The row index * @param orig The orginal call value. */ void GetOrigCall(int32_t row, CHPReseqOrigCall &orig); /*! The maximum length of a probe set name. */ int GetMaxProbeSetName() const { return maxProbeSetName; } protected: /*! Prepares the data set. */ void PrepareGenoEntryDataSet(); /*! Prepares the data set. */ void PrepareExprEntryDataSet(); /*! Prepares the data set. */ void PrepareUnivEntryDataSet(); /*! Prepares the data set. */ void PrepareReseqEntryDataSet(); /*! Adds columns to the data set. */ void AddUnivColumns(DataSetHeader& hdr); /*! Adds columns to the data set. */ void AddExprColumns(DataSetHeader& hdr, bool hasCompData); /*! Adds columns to the data set. */ void AddGenoColumns(DataSetHeader& hdr); /*! Adds columns to the data set. */ void AddReseqColumns(DataSetHeader& hdr); /*! Gets a string parameter to the header. */ std::wstring GetWStringFromGenericHdr(const std::wstring& name); /*! Adds a string parameter to the header. */ void SetWStringToGenericHdr(const std::wstring& name, const std::wstring value, int32_t reserve=-1); /*! Gets an integer parameter from the header. */ int32_t GetInt32FromGenericHdr(const std::wstring& name); /*! Adds an integer parameter to the header. */ void SetInt32ToGenericHdr(const std::wstring& name, int32_t value); /*! Adds an float parameter to the header. */ void SetFloatToGenericHdr(const std::wstring& name, float value); /*! Prepares the data set for the bg set. */ void PrepareBackgroundZoneDataSet(); /*! Prepares the data set for the force call set. */ void PrepareForceDataSet(); /*! Prepares the data set for the orig call set. */ void PrepareOrigDataSet(); /*! Adds columns to the data set. */ void AddColumns(DataSetHeader& hdr, bool hasCompData); }; } #endif affxparser/src/fusion/calvin_files/data/src/CHPExpressionEntry.cpp0000644000175200017520000000601314516003651026406 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CHPExpressionEntry.h" // using namespace affymetrix_calvin_io; CHPExpressionEntry::CHPExpressionEntry() { Clear(); } CHPExpressionEntry::CHPExpressionEntry(const std::string& psName, u_int8_t detect, float detectPValue, float sig, u_int16_t nPairs, u_int16_t nPairsUsed, bool compData, u_int8_t chg, float chgPValue, float sLogRatio, float sLogRatioLo, float sLogRatioHi, u_int16_t commonPrs) { probeSetName = psName; detection = detect; detectionPValue = detectPValue; signal = sig; numPairs = nPairs; numPairsUsed = nPairsUsed; hasComparisonData = compData; change = chg; changePValue = chgPValue; sigLogRatio = sLogRatio; sigLogRatioLo = sLogRatioLo; sigLogRatioHi = sLogRatioHi; commonPairs = commonPrs; } CHPExpressionEntry::CHPExpressionEntry(const std::string& psName, u_int8_t detect, float detectPValue, float sig, u_int16_t nPairs, u_int16_t nPairsUsed) { probeSetName = psName; detection = detect; detectionPValue = detectPValue; signal = sig; numPairs = nPairs; numPairsUsed = nPairsUsed; hasComparisonData = false; change = 0; changePValue = 0; sigLogRatio = 0; sigLogRatioLo = 0; sigLogRatioHi = 0; commonPairs = 0; } CHPExpressionEntry::~CHPExpressionEntry() {} void CHPExpressionEntry::Clear() { probeSetName.clear(); detection = 0; detectionPValue = 0; signal = 0; numPairs = 0; numPairsUsed = 0; hasComparisonData = false; change = 0; changePValue = 0; sigLogRatio = 0; sigLogRatioLo = 0; sigLogRatioHi = 0; commonPairs = 0; } CHPExpressionEntry CHPExpressionEntry::operator=(CHPExpressionEntry e) { probeSetName = e.GetProbeSetName(); detection = e.GetDetection(); detectionPValue = e.GetDetectionPValue(); signal = e.GetSignal(); numPairs = e.GetNumPairs(); numPairsUsed = e.GetNumPairsUsed(); hasComparisonData = e.GetHasComparisonData(); change = e.GetChange(); changePValue = e.GetChangePValue(); sigLogRatio = e.GetSigLogRatio(); sigLogRatioLo = e.GetSigLogRatioLo(); sigLogRatioHi = e.GetSigLogRatioHi(); commonPairs = e.GetCommonPairs(); return *this; } affxparser/src/fusion/calvin_files/data/src/CHPExpressionEntry.h0000644000175200017520000001527514516003651026065 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPExpressionEntry_HEADER_ #define _CHPExpressionEntry_HEADER_ /*! \file CHPExpressionEntry.h Defines a class to store the expression analyis results. */ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include // namespace affymetrix_calvin_io { /*! This class stores the expression probe set analysis results. */ class CHPExpressionEntry { private: /*! The probe set name. */ std::string probeSetName; /*! The detection call. */ u_int8_t detection; /*! The detection p-value. */ float detectionPValue; /*! The signal. */ float signal; /*! The number of probe pairs in the probe set. */ u_int16_t numPairs; /*! The number of probe pairs used in the analysis. */ u_int16_t numPairsUsed; /*! A flag to indicate if comparison data exists. */ bool hasComparisonData; /*! The change call. */ u_int8_t change; /*! The change p-value. */ float changePValue; /*! The signal log ratio. */ float sigLogRatio; /*! The signal log ratio low. */ float sigLogRatioLo; /*! The signal log ratio high. */ float sigLogRatioHi; /*! The number of probe pairs in common between control and experiment. */ u_int16_t commonPairs; public: /*! Constructor. */ CHPExpressionEntry(); /*! Constructor with absolute and comparison data. * @param probeSetname The probe set name * @param detection The detection * @param detectionPValue The detection p-value * @param signal The signal * @param numPairs The number of probe pairs in the set. * @param numPairsUsed The number of probe pairs used in the analysis * @param compData Flag indicating if comp data exists. * @param change The change call * @param changePValue The change p-value * @param sigLogRatio The signal log ratio * @param sigLogRatioLo The signal log ratio low * @param sigLogRatioHi The signal log ratio high * @param commonPairs The number of probe pairs in common between control and experiment */ CHPExpressionEntry(const std::string& probeSetname, u_int8_t detection, float detectionPValue, float signal, u_int16_t numPairs, u_int16_t numPairsUsed, bool compData, u_int8_t change, float changePValue, float sigLogRatio, float sigLogRatioLo, float sigLogRatioHi, u_int16_t commonPairs); /*! Constructor with absolute data only. * @param probeSetname The probe set name * @param detection The detection * @param detectionPValue The detection p-value * @param signal The signal * @param numPairs The number of probe pairs in the set. * @param numPairsUsed The number of probe pairs used in the analysis */ CHPExpressionEntry(const std::string& probeSetname, u_int8_t detection, float detectionPValue, float signal, u_int16_t numPairs, u_int16_t numPairsUsed); /*! Destructor */ ~CHPExpressionEntry(); /*! Clears the members. */ void Clear(); /*! Assignment operator * @param e The entry to copy * @return The new object */ CHPExpressionEntry operator=(CHPExpressionEntry e); /*! Gets the probe set name. */ std::string GetProbeSetName() const { return probeSetName; } /*! Get the detection call. */ u_int8_t GetDetection() const { return detection; } /*! Gets the detection p-value. */ float GetDetectionPValue() const { return detectionPValue; } /*! Gets the signal value. */ float GetSignal() const { return signal; } /*! Gets the number of probe pairs in the set. */ u_int16_t GetNumPairs() const { return numPairs; } /*! Gets the number of probe pairs used in the analysis. */ u_int16_t GetNumPairsUsed() const { return numPairsUsed; } /*! A flag to indicate if comparison data exists. */ bool GetHasComparisonData() const { return hasComparisonData; } /*! Gets the change call. */ u_int8_t GetChange() const { return change; } /*! Gets the change p-value. */ float GetChangePValue() const { return changePValue; } /*! Gets the signal log ratio. */ float GetSigLogRatio() const { return sigLogRatio; } /*! Gets the signal log ratio low. */ float GetSigLogRatioLo() const { return sigLogRatioLo; } /*! Gets the signal log ratio high. */ float GetSigLogRatioHi() const { return sigLogRatioHi; } /*! Gets the number of probe pairs in common between control and experiment. */ u_int16_t GetCommonPairs() const { return commonPairs; } /*! Sets the probe set name. */ void SetProbeSetName(const std::string& p) { probeSetName = p; } /*! Sets the detection value. */ void SetDetection(u_int8_t p) { detection = p; } /*! Sets the detection p-value. */ void SetDetectionPValue(float p) { detectionPValue = p; } /*! Sets the signal value. */ void SetSignal(float p) { signal = p; } /*! Sets the number of pairs in the probe sets. */ void SetNumPairs(u_int16_t p) { numPairs = p; } /*! Sets the number of pairs used in the analysis. */ void SetNumPairsUsed(u_int16_t p) { numPairsUsed = p; } /*! Sets the flag to indicate if comparison data exists. */ void SetHasComparisonData(bool b) { hasComparisonData = b; } /*! Sets the change call. */ void SetChange(u_int8_t p) { change = p; } /*! Sets the change p-value. */ void SetChangePValue(float p) { changePValue = p; } /*! Sets the signal log ratio. */ void SetSigLogRatio(float p) { sigLogRatio = p; } /*! Sets the signal log ratio low. */ void SetSigLogRatioLo(float p) { sigLogRatioLo = p; } /*! Sets the signal log ratio high. */ void SetSigLogRatioHi(float p) { sigLogRatioHi = p; } /*! Sets the number of probe pairs in common between control and experiment. */ void SetCommonPairs(u_int16_t p) { commonPairs = p; } }; /*! An STL list of zones */ typedef std::vector CHPExpressionEntryVector; /*! iterator of CHPExpressionEntrys */ typedef std::vector::iterator CHPExpressionEntryVectorIt; } #endif // _CHPExpressionEntry_HEADER_ affxparser/src/fusion/calvin_files/data/src/CHPGenotypeEntry.cpp0000644000175200017520000000617414516003651026051 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CHPGenotypeEntry.h" // using namespace affymetrix_calvin_io; CHPGenotypeEntry::CHPGenotypeEntry() { Clear(); } CHPGenotypeEntry::CHPGenotypeEntry(const std::string& psName, u_int8_t c, float conf, float ras1, float ras2, float aa, float ab, float bb, float no) { probeSetName = psName; call = c; confidence = conf; RAS1 = ras1; RAS2 = ras2; aaCall = aa; abCall = ab; bbCall = bb; noCall = no; } CHPGenotypeEntry::~CHPGenotypeEntry() {} void CHPGenotypeEntry::Clear() { probeSetName.clear(); call = 0; confidence = 0; RAS1 = 0; RAS2 = 0; aaCall = 0; abCall = 0; bbCall = 0; noCall = 0; } CHPGenotypeEntry CHPGenotypeEntry::operator=(CHPGenotypeEntry zn) { probeSetName = zn.GetProbeSetName(); call = zn.GetCall(); confidence = zn.GetConfidence(); RAS1 = zn.GetRAS1(); RAS2 = zn.GetRAS2(); aaCall = zn.GetAACall(); abCall = zn.GetABCall(); bbCall = zn.GetBBCall(); noCall = zn.GetNoCall(); return *this; } std::string CHPGenotypeEntry::GetProbeSetName() const { return probeSetName; } u_int8_t CHPGenotypeEntry::GetCall() const { return call; } float CHPGenotypeEntry::GetConfidence() const { return confidence; } float CHPGenotypeEntry::GetRAS1() const { return RAS1; } float CHPGenotypeEntry::GetRAS2() const { return RAS2; } float CHPGenotypeEntry::GetAACall() const { return aaCall; } float CHPGenotypeEntry::GetABCall() const { return abCall; } float CHPGenotypeEntry::GetBBCall() const { return bbCall; } float CHPGenotypeEntry::GetNoCall() const { return noCall; } void CHPGenotypeEntry::SetProbeSetName(const std::string& p) { probeSetName = p; } void CHPGenotypeEntry::SetCall(u_int8_t p) { call = p; } void CHPGenotypeEntry::SetConfidence(float p) { confidence = p; } void CHPGenotypeEntry::SetRAS1(float p) { RAS1 = p; } void CHPGenotypeEntry::SetRAS2(float p) { RAS2 = p; } void CHPGenotypeEntry::SetAACall(float p) { aaCall = p; } void CHPGenotypeEntry::SetABCall(float p) { abCall = p; } void CHPGenotypeEntry::SetBBCall(float p) { bbCall = p; } void CHPGenotypeEntry::SetNoCall(float p) { noCall = p; } affxparser/src/fusion/calvin_files/data/src/CHPGenotypeEntry.h0000644000175200017520000000512514516003651025511 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPGenotypeEntry_HEADER_ #define _CHPGenotypeEntry_HEADER_ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class stores a genotype result */ class CHPGenotypeEntry { private: std::string probeSetName; u_int8_t call; float confidence; float RAS1; float RAS2; float aaCall; float abCall; float bbCall; float noCall; public: CHPGenotypeEntry(); CHPGenotypeEntry(const std::string& probeSetname, u_int8_t call, float confidence, float RAS1, float RAS2, float AACall, float ABCall, float BBCall, float NoCall); ~CHPGenotypeEntry(); void Clear(); /*! Assignment operator * @param zn The zone to copy * @return The new zone object */ CHPGenotypeEntry operator=(CHPGenotypeEntry zn); std::string GetProbeSetName() const; u_int8_t GetCall() const; float GetConfidence() const; float GetRAS1() const; float GetRAS2() const; float GetAACall() const; float GetABCall() const; float GetBBCall() const; float GetNoCall() const; void SetProbeSetName(const std::string& p); void SetCall(u_int8_t p); void SetConfidence(float p); void SetRAS1(float p); void SetRAS2(float p); void SetAACall(float p); void SetABCall(float p); void SetBBCall(float p); void SetNoCall(float p); }; /*! An STL list of zones */ typedef std::vector CHPGenotypeEntryVector; /*! iterator of CHPGenotypeEntrys */ typedef std::vector::iterator CHPGenotypeEntryVectorIt; } #endif // _CHPGenotypeEntry_HEADER_ affxparser/src/fusion/calvin_files/data/src/CHPMultiDataData.cpp0000644000175200017520000013103314516003651025704 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2009 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CHPMultiDataData.h" // #include "calvin_files/data/src/DataSetHeader.h" #include "calvin_files/utils/src/StringUtils.h" // using namespace std; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; /*! The data set name. */ const static std::wstring MULTI_DATA_NAME = L"MultiData"; /*! The column name for the probe set name. */ const static std::wstring PROBE_SET_NAME = L"ProbeSetName"; /*! The column name for the cyto region. */ const static std::wstring CYTO_REGION_NAME = L"Region"; /*! The column name for the call. */ const static std::wstring GENOTYPE_CALL_NAME = L"Call"; /*! The column name for the confidence value. */ const static std::wstring GENOTYPE_CONFIDENCE_NAME = L"Confidence"; /*! The column name for the quantification value. */ const static std::wstring EXPRESSION_QUANTIFICATION_NAME = L"Quantification"; /*! The column name for the chromosome name. */ const static std::wstring COPY_NUMBER_CHR_NAME = L"Chromosome"; /*! The column name for the physical position of the SNP. */ const static std::wstring COPY_NUMBER_POSITION_NAME = L"Position"; /*! The column name for the call value. */ const static std::wstring CYTO_CALL_NAME = L"Call"; /*! The column name for the confidence value. */ const static std::wstring CYTO_CONFIDENCE_NAME = L"Confidence"; /*! The column name for the chromosome name. */ const static std::wstring CYTO_CHR_NAME = L"Chromosome"; /*! The column name for the physical position of the cyto region. */ const static std::wstring CYTO_START_POSITION_NAME = L"StartPosition"; /*! The column name for the physical position of the cyto region. */ const static std::wstring CYTO_STOP_POSITION_NAME = L"StopPosition"; /*! The column name for the copy number variation region. */ const static std::wstring COPY_NUMBER_VARIATION_REGION_NAME = L"Region"; /*! The column name for the copy number variation region. */ const static std::wstring COPY_NUMBER_VARIATION_SIGNAL_NAME = L"Signal"; /*! The column name for the copy number variation call. */ const static std::wstring COPY_NUMBER_VARIATION_CALL_NAME = L"Call"; /*! The column name for the copy number variation confidence value. */ const static std::wstring COPY_NUMBER_VARIATION_CONFIDENCE_NAME = L"Confidence"; /*! The column name for the region. */ const static std::wstring REGION = L"Region"; /*! The column name for the call. */ const static std::wstring CALL = L"Call"; /*! The column name for the call. */ const static std::wstring CN_CALL = L"CN Call"; /*! The column name for the confidence value. */ const static std::wstring CONFIDENCE = L"Confidence"; /*! The column name for the confidence value. */ const static std::wstring CN_CONFIDENCE = L"CN Confidence"; /*! The column name for the quantification value. */ const static std::wstring QUANTIFICATION = L"Quantification"; /*! The column name for the chromosome name. */ const static std::wstring CHR = L"Chromosome"; /*! The column name for the physical position of the SNP. */ const static std::wstring POSITION = L"Position"; /*! The column name for the physical position of the cyto region. */ const static std::wstring START_POSITION = L"StartPosition"; /*! The column name for the physical position of the cyto region. */ const static std::wstring STOP_POSITION = L"StopPosition"; /*! The column name for the copy number variation region. */ const static std::wstring SIGNAL = L"Signal"; /*! The column name for the force. */ const static std::wstring FORCE = L"Forced Call"; /*! The column name for the force. */ const static std::wstring CN_FORCE = L"CN_force"; /*! The column name for the estimate. */ const static std::wstring CN_ESTIMATE = L"CN_estim"; /*! The column name for the lower. */ const static std::wstring CN_LOWER = L"CN_lower"; /*! The column name for the upper. */ const static std::wstring CN_UPPER = L"CN_upper"; /*! The column name for the allele count. */ const static std::wstring ALLELE_COUNT = L"Allele Count"; /*! The column name for the mean marker distance. */ const static std::wstring MEAN_MARKER_DISTANCE = L"MeanMarkerDistance"; /*! The column name for signals. */ const static std::wstring SIGNAL_A = L"Signal A"; const static std::wstring SIGNAL_B = L"Signal B"; const static std::wstring SIGNAL_C = L"Signal C"; const static std::wstring SIGNAL_D = L"Signal D"; const static std::wstring SIGNAL_E = L"Signal E"; const static std::wstring SIGNAL_F = L"Signal F"; /*! The column name for contexts. */ const static std::wstring CONTEXT_A = L"Context A"; const static std::wstring CONTEXT_B = L"Context B"; const static std::wstring CONTEXT_C = L"Context C"; const static std::wstring CONTEXT_D = L"Context D"; const static std::wstring CONTEXT_E = L"Context E"; const static std::wstring CONTEXT_F = L"Context F"; /*! Columns for the chromosome display */ const static std::wstring CHR_DISPLAY = L"Display"; /*! Columns for chromosome summary - start index. */ const static std::wstring START_INDEX = L"StartIndex"; /*! Columns for chromosome summary - min signal. */ const static std::wstring MIN_SIGNAL = L"MinSignal"; /*! Columns for chromosome summary - max signal. */ const static std::wstring MAX_SIGNAL = L"MaxSignal"; /*! Columns for chromosome summary - median cn state. */ const static std::wstring MEDIAN_CN_STATE = L"MedianCnState"; /*! Columns for chromosome summary - hom frequency. */ const static std::wstring HOM_FREQ = L"HomFrequency"; /*! Columns for chromosome summary - het frequency. */ const static std::wstring HET_FREQ = L"HetFrequency"; /*! Columns for chromosome summary - mosaicism. */ const static std::wstring MOSAICISM = L"Mosaicism"; /*! The segment id tag. */ const static std::wstring SEGMENT_ID = L"SegmentID"; /*! The marker count tag. */ const static std::wstring MARKER_COUNT = L"MarkerCount"; /*! The mixture tag. */ const static std::wstring MIXTURE = L"Mixture"; /*! The calibrated CN tag. */ const static std::wstring CALIBRATED_CN = L"CalibratedCN"; /*! Familial tags - segment type. */ const static std::wstring SEGMENT_TYPE = L"SegmentType"; /*! Familial tags - reference sample key. */ const static std::wstring REFERENCE_SAMPLE_KEY = L"ReferenceSampleKey"; /*! Familial tags - reference segment id. */ const static std::wstring REFERENCE_SEGMENT_ID = L"ReferenceSegmentID"; /*! Familial tags - sample key. */ const static std::wstring FAMILIAL_SAMPLE_KEY = L"FamilialSampleKey"; /*! Familial tags - sample id. */ const static std::wstring FAMILIAL_SEGMENT_ID = L"FamilialSegmentID"; const static std::wstring SAMPLE_KEY = L"SampleKey"; const static std::wstring ARRID = L"ARRID"; const static std::wstring CHPID = L"CHPID"; const static std::wstring CHP_FILENAME = L"CHPFilename"; const static std::wstring ROLE = L"Role"; const static std::wstring ROLE_VALIDITY = L"RoleValidity"; const static std::wstring ROLE_CONFIDENCE = L"RoleConfidence"; const static std::wstring CN = L"CN"; const static std::wstring LOH = L"LOH"; const static std::wstring CNNEUTRALLOH = L"CNNeutralLOH"; const static std::wstring NORMALDIPLOID = L"NormalDiploid"; const static std::wstring NOCALL = L"NoCall"; const static std::wstring HOMOZYGOSITY = L"Homozygosity"; const static std::wstring HETEROZYGOSITY = L"Heterozygosity"; const static std::wstring PROBE_SET_INDEX = L"Index"; const static std::wstring A_SIGNAL = L"ASignal"; const static std::wstring B_SIGNAL = L"BSignal"; const static std::wstring SCAR = L"SCAR"; const static std::wstring CONTRAST = L"Contrast"; const static std::wstring SIGNAL_STRENGTH = L"SignalStrength"; /*! constructor */ DataSetInfo::DataSetInfo() { entries = NULL; maxName = -1; maxSegmentType = -1; maxReferenceSegmentID = -1; maxFamilialSegmentID = -1; maxFamilialARRID = -1; maxFamilialCHPID = -1; maxFamilialCHPFile = -1; maxFamilialRole = -1; maxFamilialCHPFile = -1; dataSetIndex = -1; } std::wstring CHPMultiDataData::GetGroupName(MultiDataType dataType) { return dataTypeGroupNames[dataType]; } /*! The data set information */ std::map &CHPMultiDataData::GetDataSetInfo() { return dataSetInfo; } /*! used for full column index for log2Ratio */ const static int cnlog2RatioIndexOffset = 4; CHPMultiDataData::CHPMultiDataData() { Clear(); } CHPMultiDataData::CHPMultiDataData(const std::string& filename, const std::list *groupNames) { Clear(); SetFilename(filename); if (groupNames == NULL) { DataGroupHeader dcHdr(MULTI_DATA_NAME); genericData.Header().AddDataGroupHdr(dcHdr); genericData.Header().SetNumDataGroups(1); } else { std::list::const_iterator it; for (it = groupNames->begin(); it != groupNames->end(); it++) { DataGroupHeader dcHdr(*it); genericData.Header().AddDataGroupHdr(dcHdr); } genericData.Header().SetNumDataGroups((int)groupNames->size()); } genericData.Header().GetGenericDataHdr()->SetFileTypeId(CHP_MULTI_DATA_TYPE); } CHPMultiDataData::~CHPMultiDataData() { Clear(); } DataSetHeader *CHPMultiDataData::GetDataSetHeader(MultiDataType dataType) { int ng = genericData.Header().GetNumDataGroups(); for (int ig=0; ig::iterator it=dataSetInfo.begin(); it!=dataSetInfo.end(); ++it) { DataSetInfo &info = it->second; info.metricColumns.clear(); if (info.entries){ info.entries->Delete(); info.entries = 0; } } dataSetInfo.clear(); dataTypeGroupNames.clear(); genericData.Header().Clear(); } /*! Gets the file header. * @return The file header. */ FileHeader* CHPMultiDataData::GetFileHeader() { return &genericData.Header(); } /*! Gets the generic data object. * @return The data object. */ GenericData& CHPMultiDataData::GetGenericData() { return genericData; } void CHPMultiDataData::SetFilename(const std::string &p) { genericData.Header().SetFilename(p); } std::string CHPMultiDataData::GetFilename() const { return ((GenericData&)genericData).Header().GetFilename(); } int32_t CHPMultiDataData::GetMetricColumnLength(MultiDataType dataType, int col) { OpenMultiDataDataSet(dataType); return dataSetInfo[dataType].metricColumns[col].GetLength(); } int32_t CHPMultiDataData::GetNumMetricColumns(MultiDataType dataType) { OpenMultiDataDataSet(dataType); return (int32_t)dataSetInfo[dataType].metricColumns.size(); } wstring CHPMultiDataData::GetMetricColumnName(MultiDataType dataType, int colIndex) { OpenMultiDataDataSet(dataType); return dataSetInfo[dataType].metricColumns[colIndex].GetName(); } int32_t CHPMultiDataData::GetEntryCount(MultiDataType dataType) { DataSetHeader *h = GetDataSetHeader(dataType); return (h == NULL ? 0 : h->GetRowCnt()); } int CHPMultiDataData::GetMaxProbeSetName(MultiDataType dataType) { OpenMultiDataDataSet(dataType); map::iterator pos = dataSetInfo.find(dataType); if (pos != dataSetInfo.end()) { return dataSetInfo[dataType].maxName; } return 0; } int CHPMultiDataData::GetMaxSegmentId(MultiDataType dataType) { OpenMultiDataDataSet(dataType); map::iterator pos = dataSetInfo.find(dataType); if (pos != dataSetInfo.end()) { return dataSetInfo[dataType].maxName; } return 0; } int CHPMultiDataData::GetDataGroupIndex(MultiDataType dataType) { const std::wstring &name = dataTypeGroupNames[dataType]; int ng = genericData.Header().GetNumDataGroups(); for (int ig=0; ig &columns, const std::wstring &groupName) { DataSetInfo info; info.maxName = maxln; info.metricColumns = columns; info.entries = NULL; info.dataType = dataType; info.dataSetIndex = (int)dataSetInfo.size(); dataSetInfo[dataType] = info; DataSetHeader dsHdr; dsHdr.SetRowCnt(ln); dsHdr.SetName(MultiDataDataSetNames[(int)dataType]); AddColumns(info, dsHdr); if (groupName.empty() == true) dataTypeGroupNames[dataType] = MULTI_DATA_NAME; else dataTypeGroupNames[dataType] = groupName; DataGroupHeader* dgHdr = GetDataGroupHeader(dataTypeGroupNames[dataType]); dgHdr->AddDataSetHdr(dsHdr); } void CHPMultiDataData::SetEntryCount(MultiDataType dataType, int32_t ln, int32_t maxln, const std::wstring &groupName) { std::vector empty; SetEntryCount(dataType, ln, maxln, empty, groupName); } void CHPMultiDataData::SetEntryCount(MultiDataType dataType, int32_t ln, int segmentTypeMax, int referenceSegmentIDMax, int familialSegmentIDMax, const std::wstring &groupName) { DataSetInfo info; info.maxSegmentType = segmentTypeMax; info.maxReferenceSegmentID = referenceSegmentIDMax; info.maxFamilialSegmentID = familialSegmentIDMax; info.entries = NULL; info.dataType = dataType; info.dataSetIndex = (int)dataSetInfo.size(); dataSetInfo[dataType] = info; DataSetHeader dsHdr; dsHdr.SetRowCnt(ln); dsHdr.SetName(MultiDataDataSetNames[(int)dataType]); AddColumns(info, dsHdr); if (groupName.empty() == true) dataTypeGroupNames[dataType] = MULTI_DATA_NAME; else dataTypeGroupNames[dataType] = groupName; DataGroupHeader* dgHdr = GetDataGroupHeader(dataTypeGroupNames[dataType]); dgHdr->AddDataSetHdr(dsHdr); } void CHPMultiDataData::SetEntryCount(MultiDataType dataType, int32_t ln, int familialARRIDMax, int familialCHPIDMax, int familialCHPFileMax, int familialRoleMax, const std::wstring &groupName) { DataSetInfo info; info.maxFamilialARRID = familialARRIDMax; info.maxFamilialCHPID = familialCHPIDMax; info.maxFamilialCHPFile = familialCHPFileMax; info.maxFamilialRole = familialRoleMax; info.entries = NULL; info.dataType = dataType; info.dataSetIndex = (int)dataSetInfo.size(); dataSetInfo[dataType] = info; DataSetHeader dsHdr; dsHdr.SetRowCnt(ln); dsHdr.SetName(MultiDataDataSetNames[(int)dataType]); AddColumns(info, dsHdr); if (groupName.empty() == true) dataTypeGroupNames[dataType] = MULTI_DATA_NAME; else dataTypeGroupNames[dataType] = groupName; DataGroupHeader* dgHdr = GetDataGroupHeader(dataTypeGroupNames[dataType]); dgHdr->AddDataSetHdr(dsHdr); } void CHPMultiDataData::GetGenotypeEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataGenotypeData &entry) { GetGenericEntry(dataType, index, entry); } void CHPMultiDataData::GetCopyNumberEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCopyNumberData &entry) { GetGenericEntry(dataType, index, entry); } void CHPMultiDataData::GetEntry(MultiDataType dataType, int index, affymetrix_calvin_data::DmetCopyNumberData &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; entry.name.clear(); ds->entries->GetData(index, colIndex++, entry.name); ds->entries->GetData(index, colIndex++, entry.call); ds->entries->GetData(index, colIndex++, entry.confidence); ds->entries->GetData(index, colIndex++, entry.force); ds->entries->GetData(index, colIndex++, entry.estimate); ds->entries->GetData(index, colIndex++, entry.lower); ds->entries->GetData(index, colIndex++, entry.upper); GetExtraMetricEntries(ds, index, colIndex, entry.metrics); } } void CHPMultiDataData::GetEntry(MultiDataType dataType, int index, affymetrix_calvin_data::DmetBiAllelicData &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; entry.name.clear(); ds->entries->GetData(index, colIndex++, entry.name); ds->entries->GetData(index, colIndex++, entry.call); ds->entries->GetData(index, colIndex++, entry.confidence); ds->entries->GetData(index, colIndex++, entry.force); ds->entries->GetData(index, colIndex++, entry.signalA); ds->entries->GetData(index, colIndex++, entry.signalB); ds->entries->GetData(index, colIndex++, entry.contextA); ds->entries->GetData(index, colIndex++, entry.contextB); GetExtraMetricEntries(ds, index, colIndex, entry.metrics); } } void CHPMultiDataData::GetEntry(MultiDataType dataType, int index, affymetrix_calvin_data::DmetMultiAllelicData &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; entry.name.clear(); ds->entries->GetData(index, colIndex++, entry.name); ds->entries->GetData(index, colIndex++, entry.call); ds->entries->GetData(index, colIndex++, entry.confidence); ds->entries->GetData(index, colIndex++, entry.force); ds->entries->GetData(index, colIndex++, entry.alleleCount); ds->entries->GetData(index, colIndex++, entry.signalA); ds->entries->GetData(index, colIndex++, entry.signalB); ds->entries->GetData(index, colIndex++, entry.signalC); ds->entries->GetData(index, colIndex++, entry.signalD); ds->entries->GetData(index, colIndex++, entry.signalE); ds->entries->GetData(index, colIndex++, entry.signalF); ds->entries->GetData(index, colIndex++, entry.contextA); ds->entries->GetData(index, colIndex++, entry.contextB); ds->entries->GetData(index, colIndex++, entry.contextC); ds->entries->GetData(index, colIndex++, entry.contextD); ds->entries->GetData(index, colIndex++, entry.contextE); ds->entries->GetData(index, colIndex++, entry.contextF); GetExtraMetricEntries(ds, index, colIndex, entry.metrics); } } void CHPMultiDataData::GetEntry(MultiDataType dataType, int index, affymetrix_calvin_data::AllelePeaks &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; entry.name.clear(); ds->entries->GetData(index, colIndex++, entry.name); ds->entries->GetData(index, colIndex++, entry.chr); ds->entries->GetData(index, colIndex++, entry.position); GetExtraMetricEntries(ds, index, colIndex, entry.peaks); } } void CHPMultiDataData::GetEntry(MultiDataType dataType, int index, affymetrix_calvin_data::MarkerABSignals &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; ds->entries->GetData(index, colIndex++, entry.index); GetExtraMetricEntries(ds, index, colIndex, entry.metrics); } } void CHPMultiDataData::GetEntry(MultiDataType dataType, int index, affymetrix_calvin_data::CytoGenotypeCallData &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; ds->entries->GetData(index, colIndex++, entry.index); ds->entries->GetData(index, colIndex++, entry.call); ds->entries->GetData(index, colIndex++, entry.confidence); ds->entries->GetData(index, colIndex++, entry.forcedCall); ds->entries->GetData(index, colIndex++, entry.aSignal); ds->entries->GetData(index, colIndex++, entry.bSignal); ds->entries->GetData(index, colIndex++, entry.signalStrength); ds->entries->GetData(index, colIndex++, entry.contrast); GetExtraMetricEntries(ds, index, colIndex, entry.metrics); } } void CHPMultiDataData::GetCopyNumberEntryLog2Ratio(MultiDataType dataType, int index, float* val) { GetGenericCopyNumberEntryLog2Ratio(dataType, index, val); } void CHPMultiDataData::GetCytoEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCytoRegionData &entry) { GetGenericEntry(dataType, index, entry); } void CHPMultiDataData::GetExpressionEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataExpressionData &entry) { GetGenericEntry(dataType, index, entry); } void CHPMultiDataData::GetCopyNumberVariationEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCopyNumberVariationRegionData &entry) { GetGenericEntry(dataType, index, entry); } void CHPMultiDataData::GetChromosomeSegmentEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ChromosomeSegmentData &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; ds->entries->GetData(index, colIndex++, entry.segmentId); ds->entries->GetData(index, colIndex++, entry.chr); ds->entries->GetData(index, colIndex++, entry.startPosition); ds->entries->GetData(index, colIndex++, entry.stopPosition); ds->entries->GetData(index, colIndex++, entry.markerCount); ds->entries->GetData(index, colIndex++, entry.meanMarkerDistance); GetExtraMetricEntries(ds, index, colIndex, entry.metrics); } } void CHPMultiDataData::GetChromosomeSegmentEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ChromosomeSegmentDataEx &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; ds->entries->GetData(index, colIndex++, entry.segmentId); ds->entries->GetData(index, colIndex++, entry.referenceSampleKey); ds->entries->GetData(index, colIndex++, entry.familialSampleKey); ds->entries->GetData(index, colIndex++, entry.chr); ds->entries->GetData(index, colIndex++, entry.startPosition); ds->entries->GetData(index, colIndex++, entry.stopPosition); ds->entries->GetData(index, colIndex++, entry.call); ds->entries->GetData(index, colIndex++, entry.confidence); ds->entries->GetData(index, colIndex++, entry.markerCount); ds->entries->GetData(index, colIndex++, entry.homozygosity); ds->entries->GetData(index, colIndex++, entry.heterozygosity); GetExtraMetricEntries(ds, index, colIndex, entry.metrics); } } void CHPMultiDataData::GetChromosomeSummaryEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ChromosomeMultiDataSummaryData &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; ds->entries->GetData(index, colIndex++, entry.chr); ds->entries->GetData(index, colIndex++, entry.display); ds->entries->GetData(index, colIndex++, entry.startIndex); ds->entries->GetData(index, colIndex++, entry.markerCount); ds->entries->GetData(index, colIndex++, entry.minSignal); ds->entries->GetData(index, colIndex++, entry.maxSignal); ds->entries->GetData(index, colIndex++, entry.medianCnState); ds->entries->GetData(index, colIndex++, entry.homFrequency); ds->entries->GetData(index, colIndex++, entry.hetFrequency); GetExtraMetricEntries(ds, index, colIndex++, entry.metrics); } } void CHPMultiDataData::GetFamilialSampleEntry(MultiDataType dataType, int index, affymetrix_calvin_data::FamilialSample &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; ds->entries->GetData(index, colIndex++, entry.sampleKey); ds->entries->GetData(index, colIndex++, entry.arrID); ds->entries->GetData(index, colIndex++, entry.chpID); ds->entries->GetData(index, colIndex++, entry.chpFilename); ds->entries->GetData(index, colIndex++, entry.role); u_int8_t rv; ds->entries->GetData(index, colIndex++, rv); entry.roleValidity = (rv == 1 ? true : false); ds->entries->GetData(index, colIndex++, entry.roleConfidence); } } void CHPMultiDataData::GetFamilialSegmentOverlapEntry(MultiDataType dataType, int index, affymetrix_calvin_data::FamilialSegmentOverlap &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; ds->entries->GetData(index, colIndex++, entry.segmentType); ds->entries->GetData(index, colIndex++, entry.referenceSampleKey); ds->entries->GetData(index, colIndex++, entry.referenceSegmentID); ds->entries->GetData(index, colIndex++, entry.familialSampleKey); ds->entries->GetData(index, colIndex++, entry.familialSegmentID); } } void CHPMultiDataData::GetGenericEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataGenotypeData &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; entry.name.clear(); ds->entries->GetData(index, colIndex++, entry.name); ds->entries->GetData(index, colIndex++, entry.call); ds->entries->GetData(index, colIndex++, entry.confidence); GetExtraMetricEntries(ds, index, colIndex, entry.metrics); } } void CHPMultiDataData::GetGenericEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCopyNumberData &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; entry.name.clear(); ds->entries->GetData(index, colIndex++, entry.name); ds->entries->GetData(index, colIndex++, entry.chr); ds->entries->GetData(index, colIndex++, entry.position); GetExtraMetricEntries(ds, index, colIndex, entry.metrics); } } void CHPMultiDataData::GetGenericCopyNumberEntryLog2Ratio(MultiDataType dataType, int index, float* val) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { GetExtraCopyNumberFloatTypeNoNameLog2Ratio(ds, index, val); } } void CHPMultiDataData::GetGenericEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCytoRegionData &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; entry.name.clear(); ds->entries->GetData(index, colIndex++, entry.name); ds->entries->GetData(index, colIndex++, entry.chr); ds->entries->GetData(index, colIndex++, entry.startPosition); ds->entries->GetData(index, colIndex++, entry.stopPosition); ds->entries->GetData(index, colIndex++, entry.call); ds->entries->GetData(index, colIndex++, entry.confidenceScore); GetExtraMetricEntries(ds, index, colIndex, entry.metrics); } } void CHPMultiDataData::GetGenericEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataExpressionData &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; entry.name.clear(); ds->entries->GetData(index, colIndex++, entry.name); ds->entries->GetData(index, colIndex++, entry.quantification); GetExtraMetricEntries(ds, index, colIndex, entry.metrics); } } void CHPMultiDataData::GetGenericEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCopyNumberVariationRegionData &entry) { DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { int colIndex = 0; entry.name.clear(); ds->entries->GetData(index, colIndex++, entry.name); ds->entries->GetData(index, colIndex++, entry.signal); ds->entries->GetData(index, colIndex++, entry.call); ds->entries->GetData(index, colIndex++, entry.confidenceScore); GetExtraMetricEntries(ds, index, colIndex, entry.metrics); } } void CHPMultiDataData::GetExtraMetricEntries(DataSetInfo *ds, int rowIndex, int colIndex, std::vector &metrics) { int32_t ncols = (int32_t) ds->metricColumns.size(); metrics.resize(ncols); for (int32_t icol=0; icolmetricColumns[icol].GetName()); switch (ds->metricColumns[icol].GetColumnType()) { case ByteColType: { int8_t val; ds->entries->GetData(rowIndex, colIndex++, val); metrics[icol].SetValueInt8(val); } break; case UByteColType: { u_int8_t val; ds->entries->GetData(rowIndex, colIndex++, val); metrics[icol].SetValueUInt8(val); } break; case ShortColType: { int16_t val; ds->entries->GetData(rowIndex, colIndex++, val); metrics[icol].SetValueInt16(val); } break; case UShortColType: { u_int16_t val; ds->entries->GetData(rowIndex, colIndex++, val); metrics[icol].SetValueUInt16(val); } break; case IntColType: { int32_t val; ds->entries->GetData(rowIndex, colIndex++, val); metrics[icol].SetValueInt32(val); } break; case UIntColType: { u_int32_t val; ds->entries->GetData(rowIndex, colIndex++, val); metrics[icol].SetValueUInt32(val); } break; case FloatColType: { float val; ds->entries->GetData(rowIndex, colIndex++, val); metrics[icol].SetValueFloat(val); } break; case ASCIICharColType: { string val; ds->entries->GetData(rowIndex, colIndex++, val); metrics[icol].SetValueAscii(val); } break; case UnicodeCharColType: { wstring val; ds->entries->GetData(rowIndex, colIndex++, val); metrics[icol].SetValueText(val); } break; } } } void CHPMultiDataData::GetExtraCopyNumberFloatTypeNoNameLog2Ratio(DataSetInfo *ds, int rowIndex, float *val) { float valFloat = 0.0f; ds->entries->GetData(rowIndex, cnlog2RatioIndexOffset, (float&)valFloat); *val = valFloat; } u_int8_t CHPMultiDataData::GetGenoCall(MultiDataType dataType, int index) { u_int8_t call = 0; DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) ds->entries->GetData(index, 1, call); return call; } float CHPMultiDataData::GetGenoConfidence(MultiDataType dataType, int index) { float conf = 0.0f; DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) ds->entries->GetData(index, 2, conf); return conf; } float CHPMultiDataData::GetExpressionQuantification(MultiDataType dataType, int index) { float quant = 0.0f; DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { ds->entries->GetData(index, 1, quant); } return quant; } std::string CHPMultiDataData::GetProbeSetName(MultiDataType dataType, int index) { std::string name; DataSetInfo *ds = OpenMultiDataDataSet(dataType); if (ds && ds->entries && ds->entries->IsOpen()) { ds->entries->GetData(index, 0, name); } return name; } void CHPMultiDataData::AddColumns(DataSetInfo &info, DataSetHeader& hdr) { switch (info.dataType) { case ExpressionMultiDataType: hdr.AddAsciiColumn(PROBE_SET_NAME, info.maxName); hdr.AddFloatColumn(QUANTIFICATION); break; case GenotypeMultiDataType: hdr.AddAsciiColumn(PROBE_SET_NAME, info.maxName); hdr.AddUByteColumn(CALL); hdr.AddFloatColumn(CONFIDENCE); break; case CopyNumberMultiDataType: hdr.AddAsciiColumn(PROBE_SET_NAME, info.maxName); hdr.AddUByteColumn(CHR); hdr.AddUIntColumn(POSITION); break; case CytoMultiDataType: hdr.AddAsciiColumn(REGION, info.maxName); hdr.AddUByteColumn(CHR); hdr.AddUIntColumn(START_POSITION); hdr.AddUIntColumn(STOP_POSITION); hdr.AddUByteColumn(CALL); hdr.AddFloatColumn(CONFIDENCE); break; case CopyNumberVariationMultiDataType: hdr.AddAsciiColumn(REGION, info.maxName); hdr.AddFloatColumn(SIGNAL); hdr.AddUByteColumn(CALL); hdr.AddFloatColumn(CONFIDENCE); break; case DmetCopyNumberMultiDataType: hdr.AddAsciiColumn(PROBE_SET_NAME, info.maxName); hdr.AddShortColumn(CN_CALL); hdr.AddFloatColumn(CN_CONFIDENCE); hdr.AddShortColumn(CN_FORCE); hdr.AddFloatColumn(CN_ESTIMATE); hdr.AddFloatColumn(CN_LOWER); hdr.AddFloatColumn(CN_UPPER); break; case DmetMultiAllelicMultiDataType: hdr.AddAsciiColumn(PROBE_SET_NAME, info.maxName); hdr.AddUByteColumn(CALL); hdr.AddFloatColumn(CONFIDENCE); hdr.AddUByteColumn(FORCE); hdr.AddUByteColumn(ALLELE_COUNT); hdr.AddFloatColumn(SIGNAL_A); hdr.AddFloatColumn(SIGNAL_B); hdr.AddFloatColumn(SIGNAL_C); hdr.AddFloatColumn(SIGNAL_D); hdr.AddFloatColumn(SIGNAL_E); hdr.AddFloatColumn(SIGNAL_F); hdr.AddUByteColumn(CONTEXT_A); hdr.AddUByteColumn(CONTEXT_B); hdr.AddUByteColumn(CONTEXT_C); hdr.AddUByteColumn(CONTEXT_D); hdr.AddUByteColumn(CONTEXT_E); hdr.AddUByteColumn(CONTEXT_F); break; case DmetBiAllelicMultiDataType: hdr.AddAsciiColumn(PROBE_SET_NAME, info.maxName); hdr.AddUByteColumn(CALL); hdr.AddFloatColumn(CONFIDENCE); hdr.AddUByteColumn(FORCE); hdr.AddFloatColumn(SIGNAL_A); hdr.AddFloatColumn(SIGNAL_B); hdr.AddUByteColumn(CONTEXT_A); hdr.AddUByteColumn(CONTEXT_B); break; case ChromosomeSummaryMultiDataType: hdr.AddUByteColumn(CHR); hdr.AddAsciiColumn(CHR_DISPLAY, info.maxName); hdr.AddUIntColumn(START_INDEX); hdr.AddUIntColumn(MARKER_COUNT); hdr.AddFloatColumn(MIN_SIGNAL); hdr.AddFloatColumn(MAX_SIGNAL); hdr.AddFloatColumn(MEDIAN_CN_STATE); hdr.AddFloatColumn(HOM_FREQ); hdr.AddFloatColumn(HET_FREQ); break; case SegmentCNMultiDataType: hdr.AddUIntColumn(SEGMENT_ID); hdr.AddUByteColumn(CHR); hdr.AddUIntColumn(START_POSITION); hdr.AddUIntColumn(STOP_POSITION); hdr.AddIntColumn(MARKER_COUNT); hdr.AddUIntColumn(MEAN_MARKER_DISTANCE); break; case SegmentLOHMultiDataType: hdr.AddUIntColumn(SEGMENT_ID); hdr.AddUByteColumn(CHR); hdr.AddUIntColumn(START_POSITION); hdr.AddUIntColumn(STOP_POSITION); hdr.AddIntColumn(MARKER_COUNT); hdr.AddUIntColumn(MEAN_MARKER_DISTANCE); break; case SegmentCNNeutralLOHMultiDataType: hdr.AddUIntColumn(SEGMENT_ID); hdr.AddUByteColumn(CHR); hdr.AddUIntColumn(START_POSITION); hdr.AddUIntColumn(STOP_POSITION); hdr.AddIntColumn(MARKER_COUNT); hdr.AddUIntColumn(MEAN_MARKER_DISTANCE); break; case SegmentNormalDiploidMultiDataType: hdr.AddUIntColumn(SEGMENT_ID); hdr.AddUByteColumn(CHR); hdr.AddUIntColumn(START_POSITION); hdr.AddUIntColumn(STOP_POSITION); hdr.AddIntColumn(MARKER_COUNT); hdr.AddUIntColumn(MEAN_MARKER_DISTANCE); break; case SegmentNoCallMultiDataType: hdr.AddUIntColumn(SEGMENT_ID); hdr.AddUByteColumn(CHR); hdr.AddUIntColumn(START_POSITION); hdr.AddUIntColumn(STOP_POSITION); hdr.AddIntColumn(MARKER_COUNT); hdr.AddUIntColumn(MEAN_MARKER_DISTANCE); break; case SegmentMosaicismMultiDataType: hdr.AddUIntColumn(SEGMENT_ID); hdr.AddUByteColumn(CHR); hdr.AddUIntColumn(START_POSITION); hdr.AddUIntColumn(STOP_POSITION); hdr.AddIntColumn(MARKER_COUNT); hdr.AddUIntColumn(MEAN_MARKER_DISTANCE); break; case SegmentGenotypeConcordanceMultiDataType: case SegmentGenotypeDiscordanceMultiDataType: case SegmentCNLossLOHConcordanceMultiDataType: case SegmentCNNeutralLOHConcordanceMultiDataType: case SegmentHeteroUPDMultiDataType: case SegmentIsoUPDMultiDataType: case SegmentDenovoCopyNumberMultiDataType: case SegmentHemizygousParentOfOriginMultiDataType: hdr.AddUIntColumn(SEGMENT_ID); hdr.AddUIntColumn(REFERENCE_SAMPLE_KEY); hdr.AddUIntColumn(FAMILIAL_SAMPLE_KEY); hdr.AddUByteColumn(CHR); hdr.AddUIntColumn(START_POSITION); hdr.AddUIntColumn(STOP_POSITION); hdr.AddUByteColumn(CALL); hdr.AddFloatColumn(CONFIDENCE); hdr.AddIntColumn(MARKER_COUNT); hdr.AddFloatColumn(HOMOZYGOSITY); hdr.AddFloatColumn(HETEROZYGOSITY); break; case FamilialSegmentOverlapsMultiDataType: hdr.AddAsciiColumn(SEGMENT_TYPE, info.maxSegmentType); hdr.AddUIntColumn(REFERENCE_SAMPLE_KEY); hdr.AddAsciiColumn(REFERENCE_SEGMENT_ID, info.maxReferenceSegmentID); hdr.AddUIntColumn(FAMILIAL_SAMPLE_KEY); hdr.AddAsciiColumn(FAMILIAL_SEGMENT_ID, info.maxFamilialSegmentID); break; case FamilialSamplesMultiDataType: hdr.AddUIntColumn(SAMPLE_KEY); hdr.AddAsciiColumn(ARRID, info.maxFamilialARRID); hdr.AddAsciiColumn(CHPID, info.maxFamilialCHPID); hdr.AddUnicodeColumn(CHP_FILENAME, info.maxFamilialCHPFile); hdr.AddAsciiColumn(ROLE, info.maxFamilialRole); hdr.AddUByteColumn(ROLE_VALIDITY); hdr.AddFloatColumn(ROLE_CONFIDENCE); break; case AllelePeaksMultiDataType: hdr.AddAsciiColumn(PROBE_SET_NAME, info.maxName); hdr.AddUByteColumn(CHR); hdr.AddUIntColumn(POSITION); break; case MarkerABSignalsMultiDataType: hdr.AddUIntColumn(PROBE_SET_INDEX); /*hdr.AddFloatColumn(A_SIGNAL); hdr.AddFloatColumn(B_SIGNAL); hdr.AddFloatColumn(SCAR);*/ break; case CytoGenotypeCallMultiDataType: hdr.AddUIntColumn(PROBE_SET_INDEX); hdr.AddByteColumn(CALL); hdr.AddFloatColumn(CONFIDENCE); hdr.AddByteColumn(FORCE); hdr.AddFloatColumn(A_SIGNAL); hdr.AddFloatColumn(B_SIGNAL); hdr.AddFloatColumn(SIGNAL_STRENGTH); hdr.AddFloatColumn(CONTRAST); break; default: break; } for (vector::iterator it=info.metricColumns.begin(); it!=info.metricColumns.end(); ++it) { hdr.AddColumn(*it); } } DataSetInfo *CHPMultiDataData::OpenMultiDataDataSet(MultiDataType dataType) { map::iterator pos = dataSetInfo.find(dataType); if (pos != dataSetInfo.end()) { return &dataSetInfo[dataType]; } //read metrics column data DataSetInfo info; if (dataTypeGroupNames.empty() == true) { std::map nameTypeMap; int nnames = sizeof(MultiDataDataSetNames) / sizeof(std::wstring); for (int iname=0; inameOpen(); int32_t ncols = info.entries->Header().GetColumnCnt(); info.metricColumns.clear(); int startCol = 0; if (dataType == ExpressionMultiDataType || dataType == ExpressionControlMultiDataType) { startCol = 2; } else if (dataType == GenotypeMultiDataType || dataType == GenotypeControlMultiDataType) { startCol = 3; } else if (dataType == CopyNumberMultiDataType) { startCol = 3; } else if (dataType == CytoMultiDataType) { startCol = 6; } else if (dataType == CopyNumberVariationMultiDataType) { startCol = 4; } else if (dataType == DmetCopyNumberMultiDataType) { startCol = 7; } else if (dataType == DmetMultiAllelicMultiDataType) { startCol = 17; } else if (dataType == DmetBiAllelicMultiDataType) { startCol = 8; } else if (dataType == ChromosomeSummaryMultiDataType) { startCol = 9; } else if (dataType == SegmentCNMultiDataType || dataType == SegmentLOHMultiDataType || dataType == SegmentCNNeutralLOHMultiDataType || dataType == SegmentNormalDiploidMultiDataType || dataType == SegmentNoCallMultiDataType || dataType == SegmentMosaicismMultiDataType) { startCol = 6; } else if (dataType == SegmentGenotypeConcordanceMultiDataType || dataType == SegmentGenotypeDiscordanceMultiDataType || dataType == SegmentCNLossLOHConcordanceMultiDataType || dataType == SegmentCNNeutralLOHConcordanceMultiDataType || dataType == SegmentHeteroUPDMultiDataType || dataType == SegmentIsoUPDMultiDataType || dataType == SegmentDenovoCopyNumberMultiDataType || dataType == SegmentHemizygousParentOfOriginMultiDataType) { startCol = 11; } else if (dataType == FamilialSegmentOverlapsMultiDataType) { startCol = 5; } else if (dataType == FamilialSamplesMultiDataType) { startCol = 7; } else if (dataType == AllelePeaksMultiDataType) { startCol = 3; } else if (dataType == MarkerABSignalsMultiDataType) { startCol = 1; } else if (dataType == CytoGenotypeCallMultiDataType) { startCol = 8; } for (int32_t icol=startCol; icolHeader().GetColumnInfo(icol)); } dataSetInfo[dataType] = info; return &dataSetInfo[dataType]; } return NULL; } std::wstring CHPMultiDataData::GetArrayType() { return GetWStringFromGenericHdr(ARRAY_TYPE_PARAM_NAME); } void CHPMultiDataData::SetArrayType(const std::wstring& value) { SetWStringToGenericHdr(ARRAY_TYPE_PARAM_NAME, value, ARRAY_TYPE_MAX_LEN); } std::wstring CHPMultiDataData::GetAlgName() { return GetWStringFromGenericHdr(ALGORITHM_NAME_PARAM_NAME); } void CHPMultiDataData::SetAlgName(const std::wstring& value) { SetWStringToGenericHdr(ALGORITHM_NAME_PARAM_NAME, value); } std::wstring CHPMultiDataData::GetAlgVersion() { return GetWStringFromGenericHdr(ALG_VERSION_PARAM_NAME); } void CHPMultiDataData::SetAlgVersion(const std::wstring& value) { SetWStringToGenericHdr(ALG_VERSION_PARAM_NAME, value); } ParameterNameValueTypeList CHPMultiDataData::GetAlgParams() { ParameterNameValueTypeList nvt; ParameterNameValueTypeIt begin, end; ParameterNameValueType param; genericData.Header().GetGenericDataHdr()->GetNameValIterators(begin, end); while(begin != end) { std::wstring key = begin->GetName(); if(key.compare(0, ALGORITHM_PARAM_NAME_PREFIX_S.size(),ALGORITHM_PARAM_NAME_PREFIX_S) == 0) { param = *begin; key.erase(0, ALGORITHM_PARAM_NAME_PREFIX_S.size()); param.SetName(key); nvt.push_back(param); } begin++; } return nvt; } void CHPMultiDataData::AddAlgParams(const ParameterNameValueTypeList& params) { ParameterNameValueType param; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); for (ParameterNameValueTypeList::const_iterator it=params.begin(); it != params.end(); it++) { param = *it; param.SetName(ALGORITHM_PARAM_NAME_PREFIX_S + param.GetName()); hdr->AddNameValParam(param); } } void CHPMultiDataData::AddAppMetaInfo(const ParameterNameValueTypeList& params) { ParameterNameValueType param; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); for (ParameterNameValueTypeList::const_iterator it=params.begin(); it != params.end(); ++it) { param = *it; param.SetName(APPLICATION_META_INFO_PREFIX_S + param.GetName()); hdr->AddNameValParam(param); } } ParameterNameValueTypeList CHPMultiDataData::GetSummaryParams() { ParameterNameValueTypeList nvt; ParameterNameValueTypeIt begin, end; ParameterNameValueType param; genericData.Header().GetGenericDataHdr()->GetNameValIterators(begin, end); while(begin != end) { std::wstring key = begin->GetName(); if(key.compare(0, CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S.size(),CHIP_SUMMARY_PARAMETER_NAME_PREFIX) == 0) { param = *begin; key.erase(0, CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S.size()); param.SetName(key); nvt.push_back(param); } begin++; } return nvt; } void CHPMultiDataData::AddSummaryParams(const ParameterNameValueTypeList& params) { ParameterNameValueType param; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); for (ParameterNameValueTypeList::const_iterator it=params.begin(); it != params.end(); it++) { param = *it; param.SetName(CHIP_SUMMARY_PARAMETER_NAME_PREFIX + param.GetName()); hdr->AddNameValParam(param); } } std::wstring CHPMultiDataData::GetWStringFromGenericHdr(const std::wstring& name) { std::wstring result; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(name, paramType)) { result = paramType.GetValueText(); } return result; } void CHPMultiDataData::SetWStringToGenericHdr(const std::wstring& name, const std::wstring value, int32_t reserve) { ParameterNameValueType paramType; paramType.SetName(name); paramType.SetValueText(value, reserve); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } affxparser/src/fusion/calvin_files/data/src/CHPMultiDataData.h0000644000175200017520000004672114516003651025362 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPMultiDataData_HEADER_ #define _CHPMultiDataData_HEADER_ #include "calvin_files/data/src/AllelePeaks.h" #include "calvin_files/data/src/ChromosomeMultiDataSummaryData.h" #include "calvin_files/data/src/ChromosomeSegmentMultiDataData.h" #include "calvin_files/data/src/ColumnInfo.h" #include "calvin_files/data/src/FamilialMultiDataData.h" #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/data/src/GenericData.h" #include "calvin_files/data/src/MarkerABSignals.h" #include "calvin_files/data/src/ProbeSetMultiDataData.h" #include "calvin_files/data/src/CytoGenotypeCallMultiDataData.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include // namespace affymetrix_calvin_io { /*! The identifier to identify a multi-data CHP file. */ #define CHP_MULTI_DATA_TYPE "affymetrix-multi-data-type-analysis" /*! An enumerant to store the types of data stored in the file. */ typedef enum MultiDataType { ExpressionMultiDataType, ExpressionControlMultiDataType, GenotypeMultiDataType, GenotypeControlMultiDataType, CopyNumberMultiDataType, CytoMultiDataType, CopyNumberVariationMultiDataType, DmetCopyNumberMultiDataType, DmetMultiAllelicMultiDataType, DmetBiAllelicMultiDataType, ChromosomeSummaryMultiDataType, SegmentCNMultiDataType, SegmentLOHMultiDataType, SegmentCNNeutralLOHMultiDataType, SegmentNormalDiploidMultiDataType, SegmentMosaicismMultiDataType, SegmentNoCallMultiDataType, FamilialSegmentOverlapsMultiDataType, FamilialSamplesMultiDataType, SegmentGenotypeConcordanceMultiDataType, SegmentGenotypeDiscordanceMultiDataType, SegmentCNLossLOHConcordanceMultiDataType, SegmentCNNeutralLOHConcordanceMultiDataType, SegmentHeteroUPDMultiDataType, SegmentIsoUPDMultiDataType, SegmentDenovoCopyNumberMultiDataType, SegmentHemizygousParentOfOriginMultiDataType, AllelePeaksMultiDataType, MarkerABSignalsMultiDataType, CytoGenotypeCallMultiDataType } MultiDataType_t; /*! The data set name. */ const static std::wstring MultiDataDataSetNames[] = { L"Expression", L"ExpressionControl", L"Genotype", L"GenotypeControl", L"CopyNumber", L"Cyto", L"CopyNumberVariation", L"DmetCopyNumber", L"DmetMultiAllelic", L"DmetBiAllelic", L"Summary", L"CN", L"LOH", L"CNNeutralLOH", L"NormalDiploid", L"Mosaicism", L"NoCall", L"SegmentOverlaps", L"Samples", L"GenotypeConcordance", L"GenotypeDiscordance", L"CNLossLOHConcordance", L"CNNeutralLOHConcordance", L"HeteroUPD", L"IsoUPD", L"DenovoCopyNumber", L"HemizygousParentOfOrigin", L"AllelePeaks", L"MarkerABSignal", L"Calls" }; /*! The data types. */ const static MultiDataType MultiDataDataTypes[] = { ExpressionMultiDataType, ExpressionControlMultiDataType, GenotypeMultiDataType, GenotypeControlMultiDataType, CopyNumberMultiDataType, CytoMultiDataType, CopyNumberVariationMultiDataType, DmetCopyNumberMultiDataType, DmetMultiAllelicMultiDataType, DmetBiAllelicMultiDataType, ChromosomeSummaryMultiDataType, SegmentCNMultiDataType, SegmentLOHMultiDataType, SegmentCNNeutralLOHMultiDataType, SegmentNormalDiploidMultiDataType, SegmentMosaicismMultiDataType, SegmentNoCallMultiDataType, FamilialSegmentOverlapsMultiDataType, FamilialSamplesMultiDataType, SegmentGenotypeConcordanceMultiDataType, SegmentGenotypeDiscordanceMultiDataType, SegmentCNLossLOHConcordanceMultiDataType, SegmentCNNeutralLOHConcordanceMultiDataType, SegmentHeteroUPDMultiDataType, SegmentIsoUPDMultiDataType, SegmentDenovoCopyNumberMultiDataType, SegmentHemizygousParentOfOriginMultiDataType, AllelePeaksMultiDataType, MarkerABSignalsMultiDataType, CytoGenotypeCallMultiDataType }; /*! Holds data set information. */ class DataSetInfo { public: /*! The data type. */ MultiDataType dataType; /*! chp data sets */ DataSet* entries; /*! The maximum length of the name column. */ int maxName; /*! The maximum length of the familial seg type column. */ int maxSegmentType; /*! The maximum length of the familial ref seg id column. */ int maxReferenceSegmentID; /*! The maximum length of the familial seg id column. */ int maxFamilialSegmentID; /*! The maximum length of the familial sample ARR id */ int maxFamilialARRID; /*! The maximum length of the familial sample CHP id */ int maxFamilialCHPID; /*! The maximum length of the familial sample CHP file name */ int maxFamilialCHPFile; /*! The maximum length of the familial sample role */ int maxFamilialRole; /*! The data set index. */ int dataSetIndex; /*! An array of extra metric columns. */ std::vector metricColumns; /*! constructor */ DataSetInfo(); }; /*! Holds data associated with genotype or expression CHP files. */ class CHPMultiDataData { public: /*! Constructor */ CHPMultiDataData(); /*! Constructor with file name. * @param filename The name of the CHP file. * @param groupNames A list of names for the data groups. */ CHPMultiDataData(const std::string& filename, const std::list *groupNames = NULL); /*! Destructor */ ~CHPMultiDataData(); private: /*! The generic data item. */ GenericData genericData; /*! chp data sets */ std::map dataSetInfo; /*! data groups */ std::map dataTypeGroupNames; public: std::wstring GetGroupName(MultiDataType dataType); /*! The data set information */ std::map &GetDataSetInfo(); /*! The maximum length of a probe set name. * @param dataType The data type * @return The maximum probe set name length */ int GetMaxProbeSetName(MultiDataType dataType); /*! The maximum length of a segment id. * @param dataType The data type * @return The maximum length */ int GetMaxSegmentId(MultiDataType dataType); /*! Clears the members. */ void Clear(); /*! Sets the file name. * @param p The name of the CHP file */ void SetFilename(const std::string &p); /*! Gets the file name. * @return The file name. */ std::string GetFilename() const; /*! Sets the array type */ std::wstring GetArrayType(); /*! Sets the array type. */ void SetArrayType(const std::wstring& value); /*! Sets the number of entries (probe sets). * @param dataType The data type * @param ln The number of probe sets. * @param maxln The maximum length of a probe set name. * @param columns An array of metric columns * @param groupName The group name */ void SetEntryCount(MultiDataType dataType, int32_t ln, int32_t maxln, const std::vector &columns, const std::wstring &groupName = L""); /*! Sets the number of entries (probe sets). * @param dataType The data type * @param ln The number of probe sets. * @param maxln The maximum length of a probe set name. * @param groupName The group name */ void SetEntryCount(MultiDataType dataType, int32_t ln, int32_t maxln, const std::wstring &groupName = L""); /*! Sets the number of entries specifically for the familial file. * @param dataType The data type * @param ln The number of probe sets. * @param segmentTypeMax The maximum length of the familial seg type column * @param referenceSegmentIDMax The maximum length of the familial ref seg id column * @param familialSegmentIDMax The maximum length of the familial seg id column * @param groupName The group name */ void SetEntryCount(MultiDataType dataType, int32_t ln, int segmentTypeMax, int referenceSegmentIDMax, int familialSegmentIDMax, const std::wstring &groupName = L""); /*! Sets the number of entries specifically for the familial file. * @param dataType The data type * @param ln The number of probe sets. * @param familialSegmentIDMax The maximum length of the familial seg id column. * @param familialARRIDMax The maximum length of the familial sample ARR id. * @param familialCHPIDMax The maximum length of the familial sample CHP id. * @param familialCHPFile The maximum length of the familial sample CHP file name. * @param familialRole The maximum length of the familial sample role * @param groupName The group name */ void SetEntryCount(MultiDataType dataType, int32_t ln, int familialARRIDMax, int familialCHPIDMax, int familialCHPFileMax, int familialRoleMax, const std::wstring &groupName = L""); /*! Gets the number of entries (probe sets) * @param dataType The data type */ int32_t GetEntryCount(MultiDataType dataType); /*! Gets the name of the algorithm. * @return The algorithm name. */ std::wstring GetAlgName(); /*! Sets the name of the algorithm. * @param value The algorithm name. */ void SetAlgName(const std::wstring& value); /*! Gets the algorithm version. * @return The version. */ std::wstring GetAlgVersion(); /*! Sets the algorithm version. * @param value The version. */ void SetAlgVersion(const std::wstring& value); /*! Gets the algorithm parameters * @return The algoirhtm parameters. */ ParameterNameValueTypeList GetAlgParams(); /*! Adds the algorithm parameters. * @param params The algorithm parameters. */ void AddAlgParams(const ParameterNameValueTypeList& params); /*! Adds the application meta data information * @param params The application meta data */ void AddAppMetaInfo(const ParameterNameValueTypeList& params); /*! Gets the summary parameters * @return The summary parameters. */ ParameterNameValueTypeList GetSummaryParams(); /*! Adds the summary parameters. * @param params The summary parameters. */ void AddSummaryParams(const ParameterNameValueTypeList& params); /*! Gets the file header. * @return The file header. */ FileHeader* GetFileHeader(); /*! Gets the generic data object. * @return The data object. */ GenericData& GetGenericData(); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The genotype results. */ void GetGenotypeEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataGenotypeData &entry); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number results. */ void GetCopyNumberEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCopyNumberData &entry); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number results. */ void GetEntry(MultiDataType dataType, int index, affymetrix_calvin_data::DmetCopyNumberData &entry); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number results. */ void GetEntry(MultiDataType dataType, int index, affymetrix_calvin_data::DmetMultiAllelicData &entry); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number results. */ void GetEntry(MultiDataType dataType, int index, affymetrix_calvin_data::DmetBiAllelicData &entry); /*! Gets the allele peak data. * @param dataType The data type * @param index The row index. * @param entry The results. */ void GetEntry(MultiDataType dataType, int index, affymetrix_calvin_data::AllelePeaks &entry); /*! Gets the marker AB signal data. * @param dataType The data type * @param index The row index. * @param entry The results. */ void GetEntry(MultiDataType dataType, int index, affymetrix_calvin_data::MarkerABSignals &entry); /*! Gets the genotype data for cyto. * @param dataType The data type * @param index The row index. * @param entry The results. */ void GetEntry(MultiDataType dataType, int index, affymetrix_calvin_data::CytoGenotypeCallData &entry); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number results. */ void GetCopyNumberEntryLog2Ratio(MultiDataType dataType, int index, float *val); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The cyto results. */ void GetCytoEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCytoRegionData &entry); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The expression results. */ void GetExpressionEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataExpressionData &entry); /*! Gets the call of the probe set. * @param dataType The data type * @param index The row index. * @return The call. */ u_int8_t GetGenoCall(MultiDataType dataType, int index); /*! Gets the confidence in the call of the probe set. * @param dataType The data type * @param index The row index. * @return The confidence. */ float GetGenoConfidence(MultiDataType dataType, int index); /*! Gets the quantification of the probe set. * @param dataType The data type * @param index The row index. * @return The quantification. */ float GetExpressionQuantification(MultiDataType dataType, int index); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number variation results. */ void GetCopyNumberVariationEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCopyNumberVariationRegionData &entry); /*! Gets the chromosome segment data. * @param dataType The data type * @param index The row index. * @param entry The results. */ void GetChromosomeSegmentEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ChromosomeSegmentData &entry); /*! Gets the chromosome segment data. * @param dataType The data type * @param index The row index. * @param entry The results. */ void GetChromosomeSegmentEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ChromosomeSegmentDataEx &entry); /*! Gets the chromosome summary data. * @param dataType The data type * @param index The row index. * @param entry The results. */ void GetChromosomeSummaryEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ChromosomeMultiDataSummaryData &entry); /*! Gets the familial file entry. * @param dataType The data type * @param index The row index. * @param entry The results. */ void GetFamilialSegmentOverlapEntry(MultiDataType dataType, int index, affymetrix_calvin_data::FamilialSegmentOverlap &entry); /*! Gets the familial file entry. * @param dataType The data type * @param index The row index. * @param entry The results. */ void GetFamilialSampleEntry(MultiDataType dataType, int index, affymetrix_calvin_data::FamilialSample &entry); /*! Get the probe set name. * @param dataType The data type * @param index The row index. * @return The probe set name. */ std::string GetProbeSetName(MultiDataType dataType, int index); /*! Returns the data set header. * @param dataType The data type. */ DataSetHeader *GetDataSetHeader(MultiDataType dataType); /*! Returns the data group header. * @param name The name of the group. */ DataGroupHeader *GetDataGroupHeader(const std::wstring &name); /*! Returns the data group index. * @param dataType The data type. */ int GetDataGroupIndex(MultiDataType dataType); /*! Get the length of the metric columns. * @param dataType The data type * @param col The column index (of the metric columns) * @return The length. */ int32_t GetMetricColumnLength(MultiDataType dataType, int col); /*! Get the length of the metric columns. * @param dataType The data type * @return The number of columns. */ int32_t GetNumMetricColumns(MultiDataType dataType); /*! Get the metric column name. * @param dataType The data type * @param colIndex the metric column index * @return The column name */ std::wstring GetMetricColumnName(MultiDataType dataType, int colIndex); private: /*! Get the extra metric columns. * @param ds The data set info. * @param rowIndex The row index. * @param colIndex The column index * @param metrics The results. */ void GetExtraMetricEntries(DataSetInfo *ds, int rowIndex, int colIndex, std::vector &metrics); /*! Get the extra metric columns. * @param ds The data set info. * @param rowIndex The row index. * @param colIndex The column index * @param metrics The results. */ void GetExtraCopyNumberFloatTypeNoNameLog2Ratio(DataSetInfo *ds, int rowIndex, float *val); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The genotype results. */ void GetGenericEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataGenotypeData &entry); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number results. */ void GetGenericEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCopyNumberData &entry); /*! Gets the probe set data (log2Ratio only). * @param dataType The data type * @param index The row index. * @param val The copy number result (log2Ratio). */ void GetGenericCopyNumberEntryLog2Ratio(MultiDataType dataType, int index, float *val); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The cyto region results. */ void GetGenericEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCytoRegionData &entry); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The expression results. */ void GetGenericEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataExpressionData &entry); /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number variation region results. */ void GetGenericEntry(MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCopyNumberVariationRegionData &entry); /*! Opens a group for reading. * @param dataType The data type */ DataSetInfo *OpenMultiDataDataSet(MultiDataType dataType); /*! Gets a parameter value as a string. * @param name The name of the parameter. * @return The string representation. */ std::wstring GetWStringFromGenericHdr(const std::wstring& name); /*! Adds a parameter name/value pair to the parameters section. * @param name The name of the parameter. * @param value The value of the parameter. * @param reserve The length of the string to reserve. */ void SetWStringToGenericHdr(const std::wstring& name, const std::wstring value, int32_t reserve=-1); /*! Adds the columns to the data set. * @param info The data info * @param hdr The data set header. */ void AddColumns(DataSetInfo &info, DataSetHeader& hdr); }; } #endif affxparser/src/fusion/calvin_files/data/src/CHPQuantificationData.cpp0000644000175200017520000001776014516003651027010 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CHPQuantificationData.h" // #include "calvin_files/data/src/DataSetHeader.h" #include "calvin_files/utils/src/StringUtils.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; /*! The group and data set and column name to store the quantification data. */ const static std::wstring QUANTIFICATION_QUANTIFICATION_NAME = L"Quantification"; /*! The column name for the probe set name. */ const static std::wstring QUANTIFICATION_PROBE_SET_NAME = L"ProbeSetName"; /*! The column name for the probe set id. */ const static std::wstring QUANTIFICATION_PROBE_SET_ID = L"ProbeSetId"; CHPQuantificationData::CHPQuantificationData() { maxProbeSetName = -1; firstColumnType = UnicodeCharColType; entries = NULL; Clear(); } CHPQuantificationData::CHPQuantificationData(const std::string& filename) { maxProbeSetName = -1; firstColumnType = UnicodeCharColType; entries = NULL; Clear(); SetFilename(filename); DataGroupHeader dcHdr(QUANTIFICATION_QUANTIFICATION_NAME); genericData.Header().AddDataGroupHdr(dcHdr); genericData.Header().GetGenericDataHdr()->SetFileTypeId(CHP_QUANTIFICATION_TYPE); } CHPQuantificationData::~CHPQuantificationData() { Clear(); } DataSetHeader &CHPQuantificationData::GetDataSetHeader() { return genericData.Header().GetDataGroup(0).GetDataSet(0); } void CHPQuantificationData::Clear() { if (entries){ entries->Delete(); entries = 0; } genericData.Header().Clear(); } void CHPQuantificationData::SetFilename(const std::string &p) { genericData.Header().SetFilename(p); } std::string CHPQuantificationData::GetFilename() const { return ((GenericData&)genericData).Header().GetFilename(); } int32_t CHPQuantificationData::GetEntryCount() { DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(0); DataSetHeader dpHdr = dcHdr->GetDataSet(0); return dpHdr.GetRowCnt(); } void CHPQuantificationData::SetEntryCount(int32_t ln, int32_t maxln) { firstColumnType = ASCIICharColType; maxProbeSetName = maxln; ParameterNameValueType param; DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(0); DataSetHeader dpHdr; dpHdr.SetRowCnt(ln); dpHdr.SetName(QUANTIFICATION_QUANTIFICATION_NAME); AddColumns(dpHdr, false); dcHdr->AddDataSetHdr(dpHdr); } void CHPQuantificationData::SetEntryCount(int32_t ln) { firstColumnType = IntColType; maxProbeSetName = -1; ParameterNameValueType param; DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(0); DataSetHeader dpHdr; dpHdr.SetRowCnt(ln); dpHdr.SetName(QUANTIFICATION_QUANTIFICATION_NAME); AddColumns(dpHdr, true); dcHdr->AddDataSetHdr(dpHdr); } void CHPQuantificationData::GetQuantificationEntry(int index, affymetrix_calvin_data::ProbeSetQuantificationData &entry) { OpenQuantificationDataSet(); if (entries && entries->IsOpen()) { entry.id = -1; entry.name.clear(); if (firstColumnType == ASCIICharColType) entries->GetData(index, 0, entry.name); else if (firstColumnType == UnicodeCharColType) { std::wstring wprobeSetName; entries->GetData(index, 0, wprobeSetName); entry.name = StringUtils::ConvertWCSToMBS(wprobeSetName); } else if (firstColumnType == IntColType) entries->GetData(index, 0, entry.id); entries->GetData(index, 1, entry.quantification); } } void CHPQuantificationData::AddColumns(DataSetHeader& hdr, bool keyIsID) { if (keyIsID == false) hdr.AddAsciiColumn(QUANTIFICATION_PROBE_SET_NAME, maxProbeSetName); else hdr.AddIntColumn(QUANTIFICATION_PROBE_SET_ID); hdr.AddFloatColumn(QUANTIFICATION_QUANTIFICATION_NAME); } void CHPQuantificationData::OpenQuantificationDataSet() { if (entries == NULL) { entries = genericData.DataSet(0, 0); if (entries) { entries->Open(); firstColumnType = entries->Header().GetColumnInfo(0).GetColumnType(); } } } std::wstring CHPQuantificationData::GetArrayType() { return GetWStringFromGenericHdr(ARRAY_TYPE_PARAM_NAME); } void CHPQuantificationData::SetArrayType(const std::wstring& value) { SetWStringToGenericHdr(ARRAY_TYPE_PARAM_NAME, value, ARRAY_TYPE_MAX_LEN); } std::wstring CHPQuantificationData::GetAlgName() { return GetWStringFromGenericHdr(ALGORITHM_NAME_PARAM_NAME); } void CHPQuantificationData::SetAlgName(const std::wstring& value) { SetWStringToGenericHdr(ALGORITHM_NAME_PARAM_NAME, value); } std::wstring CHPQuantificationData::GetAlgVersion() { return GetWStringFromGenericHdr(ALG_VERSION_PARAM_NAME); } void CHPQuantificationData::SetAlgVersion(const std::wstring& value) { SetWStringToGenericHdr(ALG_VERSION_PARAM_NAME, value); } ParameterNameValueTypeList CHPQuantificationData::GetAlgParams() { ParameterNameValueTypeList nvt; ParameterNameValueTypeIt begin, end; ParameterNameValueType param; genericData.Header().GetGenericDataHdr()->GetNameValIterators(begin, end); while(begin != end) { std::wstring key = begin->GetName(); if(key.compare(0, ALGORITHM_PARAM_NAME_PREFIX_S.size(),ALGORITHM_PARAM_NAME_PREFIX_S) == 0) { param = *begin; key.erase(0, ALGORITHM_PARAM_NAME_PREFIX_S.size()); param.SetName(key); nvt.push_back(param); } begin++; } return nvt; } void CHPQuantificationData::AddAlgParams(const ParameterNameValueTypeList& params) { ParameterNameValueType param; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); for (ParameterNameValueTypeList::const_iterator it=params.begin(); it != params.end(); it++) { param = *it; param.SetName(ALGORITHM_PARAM_NAME_PREFIX_S + param.GetName()); hdr->AddNameValParam(param); } } ParameterNameValueTypeList CHPQuantificationData::GetSummaryParams() { ParameterNameValueTypeList nvt; ParameterNameValueTypeIt begin, end; ParameterNameValueType param; genericData.Header().GetGenericDataHdr()->GetNameValIterators(begin, end); while(begin != end) { std::wstring key = begin->GetName(); if(key.compare(0, CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S.size(),CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S) == 0) { param = *begin; key.erase(0, CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S.size()); param.SetName(key); nvt.push_back(param); } begin++; } return nvt; } void CHPQuantificationData::AddSummaryParams(const ParameterNameValueTypeList& params) { ParameterNameValueType param; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); for (ParameterNameValueTypeList::const_iterator it=params.begin(); it != params.end(); it++) { param = *it; param.SetName(CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S + param.GetName()); hdr->AddNameValParam(param); } } std::wstring CHPQuantificationData::GetWStringFromGenericHdr(const std::wstring& name) { std::wstring result; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(name, paramType)) { result = paramType.GetValueText(); } return result; } void CHPQuantificationData::SetWStringToGenericHdr(const std::wstring& name, const std::wstring value, int32_t reserve) { ParameterNameValueType paramType; paramType.SetName(name); paramType.SetValueText(value, reserve); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } affxparser/src/fusion/calvin_files/data/src/CHPQuantificationData.h0000644000175200017520000001232714516003651026447 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPQuantificationData_HEADER_ #define _CHPQuantificationData_HEADER_ #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/data/src/GenericData.h" #include "calvin_files/data/src/ProbeSetQuantificationData.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include // namespace affymetrix_calvin_io { /*! The identifier to identify a quantification CHP file. */ #define CHP_QUANTIFICATION_TYPE "affymetrix-quantification-analysis" /*! Holds data associated with quantification CHP files. */ class CHPQuantificationData { public: /*! Constructor */ CHPQuantificationData(); /*! Constructor with file name. * @param filename The name of the CHP file. */ CHPQuantificationData(const std::string& filename); /*! Destructor */ ~CHPQuantificationData(); private: /*! Flag indicating if the probe set names were stored in wide character format. */ DataSetColumnTypes firstColumnType; /*! The generic data item. */ GenericData genericData; /*! chp data sets */ DataSet* entries; /*! The maximum length of a probe set name. */ int maxProbeSetName; public: /*! The maximum length of a probe set name. */ int GetMaxProbeSetName() const { return maxProbeSetName; } /*! Clears the members. */ void Clear(); /*! Sets the file name. * @param p The name of the CHP file */ void SetFilename(const std::string &p); /*! Gets the file name. * @return The file name. */ std::string GetFilename() const; /*! Sets the array type */ std::wstring GetArrayType(); /*! Sets the array type. */ void SetArrayType(const std::wstring& value); /*! Sets the number of entries (probe sets). Use this function if names are to be stored. * @param ln The number of probe sets. * @param maxln The maximum length of a probe set name. */ void SetEntryCount(int32_t ln, int32_t maxln); /*! Sets the number of entries (probe sets). Use this function if ids are to be stored. * @param ln The number of probe sets. */ void SetEntryCount(int32_t ln); /*! Gets the number of entries (probe sets) */ int32_t GetEntryCount(); /*! Gets the name of the algorithm. * @return The algorithm name. */ std::wstring GetAlgName(); /*! Sets the name of the algorithm. * @param value The algorithm name. */ void SetAlgName(const std::wstring& value); /*! Gets the algorithm version. * @return The version. */ std::wstring GetAlgVersion(); /*! Sets the algorithm version. * @param value The version. */ void SetAlgVersion(const std::wstring& value); /*! Gets the algorithm parameters * @return The algoirhtm parameters. */ ParameterNameValueTypeList GetAlgParams(); /*! Adds the algorithm parameters. * @param params The algorithm parameters. */ void AddAlgParams(const ParameterNameValueTypeList& params); /*! Gets the summary parameters * @return The summary parameters. */ ParameterNameValueTypeList GetSummaryParams(); /*! Adds the summary parameters. * @param params The summary parameters. */ void AddSummaryParams(const ParameterNameValueTypeList& params); /*! Gets the file header. * @return The file header. */ FileHeader* GetFileHeader() { return &genericData.Header(); } /*! Gets the generic data object. * @return The data object. */ GenericData& GetGenericData() { return genericData; } /*! Gets the sequence data. * @param index The row index. * @param entry The quantification value. */ void GetQuantificationEntry(int index, affymetrix_calvin_data::ProbeSetQuantificationData &entry); /*! Opens a group for reading. */ void OpenQuantificationDataSet(); /*! Returns the data set header. */ DataSetHeader &GetDataSetHeader(); private: /*! Gets a parameter value as a string. * @param name The name of the parameter. * @return The string representation. */ std::wstring GetWStringFromGenericHdr(const std::wstring& name); /*! Adds a parameter name/value pair to the parameters section. * @param name The name of the parameter. * @param value The value of the parameter. * @param reserve The length of the string to reserve. */ void SetWStringToGenericHdr(const std::wstring& name, const std::wstring value, int32_t reserve=-1); /*! Adds the columns to the data set. * @param hdr The data set header. * @param keyIsID True if the probe set ids are to be stored, false for probe set names. */ void AddColumns(DataSetHeader& hdr, bool keyIsID); }; } #endif affxparser/src/fusion/calvin_files/data/src/CHPQuantificationDetectionData.cpp0000644000175200017520000002131414516003651030635 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CHPQuantificationDetectionData.h" // #include "calvin_files/data/src/DataSetHeader.h" #include "calvin_files/utils/src/StringUtils.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; /*! The group and data set to store the quantification/detection data. */ const static std::wstring QUANTIFICATION_DETECTION_NAME = L"QuantificationDetection"; /*! The column name for the probe set name. */ const static std::wstring QUANTIFICATION_DETECTION_PROBE_SET_NAME = L"ProbeSetName"; /*! The column name for the probe set id. */ const static std::wstring QUANTIFICATION_DETECTION_PROBE_SET_ID = L"ProbeSetId"; /*! The column name for the quantification value. */ const static std::wstring QUANTIFICATION_DETECTION_QUANTIFICATION_NAME = L"Quantification"; /*! The column name for the detection value. */ const static std::wstring QUANTIFICATION_DETECTION_DETECTION_NAME = L"Detection"; CHPQuantificationDetectionData::CHPQuantificationDetectionData() { maxProbeSetName = -1; firstColumnType = UnicodeCharColType; entries = NULL; Clear(); } CHPQuantificationDetectionData::CHPQuantificationDetectionData(const std::string& filename) { maxProbeSetName = -1; firstColumnType = UnicodeCharColType; entries = NULL; Clear(); SetFilename(filename); DataGroupHeader dcHdr(QUANTIFICATION_DETECTION_NAME); genericData.Header().AddDataGroupHdr(dcHdr); genericData.Header().GetGenericDataHdr()->SetFileTypeId(CHP_QUANTIFICATION_DETECTION_TYPE); } CHPQuantificationDetectionData::~CHPQuantificationDetectionData() { Clear(); } DataSetHeader &CHPQuantificationDetectionData::GetDataSetHeader() { return genericData.Header().GetDataGroup(0).GetDataSet(0); } void CHPQuantificationDetectionData::Clear() { if (entries){ entries->Delete(); entries = 0; } genericData.Header().Clear(); } void CHPQuantificationDetectionData::SetFilename(const std::string &p) { genericData.Header().SetFilename(p); } std::string CHPQuantificationDetectionData::GetFilename() const { return ((GenericData&)genericData).Header().GetFilename(); } int32_t CHPQuantificationDetectionData::GetEntryCount() { DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(0); DataSetHeader dpHdr = dcHdr->GetDataSet(0); return dpHdr.GetRowCnt(); } void CHPQuantificationDetectionData::SetEntryCount(int32_t ln, int32_t maxln) { firstColumnType = ASCIICharColType; maxProbeSetName = maxln; ParameterNameValueType param; DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(0); DataSetHeader dpHdr; dpHdr.SetRowCnt(ln); dpHdr.SetName(QUANTIFICATION_DETECTION_NAME); AddColumns(dpHdr, false); dcHdr->AddDataSetHdr(dpHdr); } void CHPQuantificationDetectionData::SetEntryCount(int32_t ln) { firstColumnType = IntColType; maxProbeSetName = -1; ParameterNameValueType param; DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(0); DataSetHeader dpHdr; dpHdr.SetRowCnt(ln); dpHdr.SetName(QUANTIFICATION_DETECTION_NAME); AddColumns(dpHdr, true); dcHdr->AddDataSetHdr(dpHdr); } void CHPQuantificationDetectionData::GetQuantificationDetectionEntry(int index, affymetrix_calvin_data::ProbeSetQuantificationDetectionData &entry) { OpenQuantificationDetectionDataSet(); if (entries && entries->IsOpen()) { entry.id = -1; entry.name.clear(); if (firstColumnType == ASCIICharColType) entries->GetData(index, 0, entry.name); else if (firstColumnType == UnicodeCharColType) { std::wstring wprobeSetName; entries->GetData(index, 0, wprobeSetName); entry.name = StringUtils::ConvertWCSToMBS(wprobeSetName); } else if (firstColumnType == IntColType) entries->GetData(index, 0, entry.id); entries->GetData(index, 1, entry.quantification); entries->GetData(index, 2, entry.pvalue); } } void CHPQuantificationDetectionData::AddColumns(DataSetHeader& hdr, bool keyIsID) { if (keyIsID == false) hdr.AddAsciiColumn(QUANTIFICATION_DETECTION_PROBE_SET_NAME, maxProbeSetName); else hdr.AddIntColumn(QUANTIFICATION_DETECTION_PROBE_SET_ID); hdr.AddFloatColumn(QUANTIFICATION_DETECTION_QUANTIFICATION_NAME); hdr.AddFloatColumn(QUANTIFICATION_DETECTION_DETECTION_NAME); } void CHPQuantificationDetectionData::OpenQuantificationDetectionDataSet() { if (entries == NULL) { entries = genericData.DataSet(0, 0); if (entries) { entries->Open(); firstColumnType = entries->Header().GetColumnInfo(0).GetColumnType(); } } } std::wstring CHPQuantificationDetectionData::GetArrayType() { return GetWStringFromGenericHdr(ARRAY_TYPE_PARAM_NAME); } void CHPQuantificationDetectionData::SetArrayType(const std::wstring& value) { SetWStringToGenericHdr(ARRAY_TYPE_PARAM_NAME, value, ARRAY_TYPE_MAX_LEN); } std::wstring CHPQuantificationDetectionData::GetAlgName() { return GetWStringFromGenericHdr(ALGORITHM_NAME_PARAM_NAME); } void CHPQuantificationDetectionData::SetAlgName(const std::wstring& value) { SetWStringToGenericHdr(ALGORITHM_NAME_PARAM_NAME, value); } std::wstring CHPQuantificationDetectionData::GetAlgVersion() { return GetWStringFromGenericHdr(ALG_VERSION_PARAM_NAME); } void CHPQuantificationDetectionData::SetAlgVersion(const std::wstring& value) { SetWStringToGenericHdr(ALG_VERSION_PARAM_NAME, value); } ParameterNameValueTypeList CHPQuantificationDetectionData::GetAlgParams() { ParameterNameValueTypeList nvt; ParameterNameValueTypeIt begin, end; ParameterNameValueType param; genericData.Header().GetGenericDataHdr()->GetNameValIterators(begin, end); while(begin != end) { std::wstring key = begin->GetName(); if(key.compare(0, ALGORITHM_PARAM_NAME_PREFIX_S.size(),ALGORITHM_PARAM_NAME_PREFIX_S) == 0) { param = *begin; key.erase(0, ALGORITHM_PARAM_NAME_PREFIX_S.size()); param.SetName(key); nvt.push_back(param); } begin++; } return nvt; } void CHPQuantificationDetectionData::AddAlgParams(const ParameterNameValueTypeList& params) { ParameterNameValueType param; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); for (ParameterNameValueTypeList::const_iterator it=params.begin(); it != params.end(); it++) { param = *it; param.SetName(ALGORITHM_PARAM_NAME_PREFIX_S + param.GetName()); hdr->AddNameValParam(param); } } ParameterNameValueTypeList CHPQuantificationDetectionData::GetSummaryParams() { ParameterNameValueTypeList nvt; ParameterNameValueTypeIt begin, end; ParameterNameValueType param; genericData.Header().GetGenericDataHdr()->GetNameValIterators(begin, end); while(begin != end) { std::wstring key = begin->GetName(); if(key.compare(0, CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S.size(),CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S) == 0) { param = *begin; key.erase(0, CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S.size()); param.SetName(key); nvt.push_back(param); } begin++; } return nvt; } void CHPQuantificationDetectionData::AddSummaryParams(const ParameterNameValueTypeList& params) { ParameterNameValueType param; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); for (ParameterNameValueTypeList::const_iterator it=params.begin(); it != params.end(); it++) { param = *it; param.SetName(CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S + param.GetName()); hdr->AddNameValParam(param); } } std::wstring CHPQuantificationDetectionData::GetWStringFromGenericHdr(const std::wstring& name) { std::wstring result; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(name, paramType)) { result = paramType.GetValueText(); } return result; } void CHPQuantificationDetectionData::SetWStringToGenericHdr(const std::wstring& name, const std::wstring value, int32_t reserve) { ParameterNameValueType paramType; paramType.SetName(name); paramType.SetValueText(value, reserve); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } affxparser/src/fusion/calvin_files/data/src/CHPQuantificationDetectionData.h0000644000175200017520000001254314516003651030306 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPQuantificationDetectionData_HEADER_ #define _CHPQuantificationDetectionData_HEADER_ #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/data/src/GenericData.h" #include "calvin_files/data/src/ProbeSetQuantificationDetectionData.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include // namespace affymetrix_calvin_io { /*! The identifier to identify a quantification/detection CHP file. */ #define CHP_QUANTIFICATION_DETECTION_TYPE "affymetrix-quantification-detection-analysis" /*! Holds data associated with quantification/detection CHP files. */ class CHPQuantificationDetectionData { public: /*! Constructor */ CHPQuantificationDetectionData(); /*! Constructor with file name. * @param filename The name of the CHP file. */ CHPQuantificationDetectionData(const std::string& filename); /*! Destructor */ ~CHPQuantificationDetectionData(); private: /*! Flag indicating if the probe set names were stored in wide character format. */ DataSetColumnTypes firstColumnType; /*! The generic data item. */ GenericData genericData; /*! chp data sets */ DataSet* entries; /*! The maximum length of a probe set name. */ int maxProbeSetName; public: /*! The maximum length of a probe set name. */ int GetMaxProbeSetName() const { return maxProbeSetName; } /*! Clears the members. */ void Clear(); /*! Sets the file name. * @param p The name of the CHP file */ void SetFilename(const std::string &p); /*! Gets the file name. * @return The file name. */ std::string GetFilename() const; /*! Sets the array type */ std::wstring GetArrayType(); /*! Sets the array type. */ void SetArrayType(const std::wstring& value); /*! Sets the number of entries (probe sets). Use this function if names are to be stored. * @param ln The number of probe sets. * @param maxln The maximum length of a probe set name. */ void SetEntryCount(int32_t ln, int32_t maxln); /*! Sets the number of entries (probe sets). Use this function if ids are to be stored. * @param ln The number of probe sets. */ void SetEntryCount(int32_t ln); /*! Gets the number of entries (probe sets) */ int32_t GetEntryCount(); /*! Gets the name of the algorithm. * @return The algorithm name. */ std::wstring GetAlgName(); /*! Sets the name of the algorithm. * @param value The algorithm name. */ void SetAlgName(const std::wstring& value); /*! Gets the algorithm version. * @return The version. */ std::wstring GetAlgVersion(); /*! Sets the algorithm version. * @param value The version. */ void SetAlgVersion(const std::wstring& value); /*! Gets the algorithm parameters * @return The algoirhtm parameters. */ ParameterNameValueTypeList GetAlgParams(); /*! Adds the algorithm parameters. * @param params The algorithm parameters. */ void AddAlgParams(const ParameterNameValueTypeList& params); /*! Gets the summary parameters * @return The summary parameters. */ ParameterNameValueTypeList GetSummaryParams(); /*! Adds the summary parameters. * @param params The summary parameters. */ void AddSummaryParams(const ParameterNameValueTypeList& params); /*! Gets the file header. * @return The file header. */ FileHeader* GetFileHeader() { return &genericData.Header(); } /*! Gets the generic data object. * @return The data object. */ GenericData& GetGenericData() { return genericData; } /*! Gets the sequence data. * @param index The row index. * @param entry The quantification/detection value. */ void GetQuantificationDetectionEntry(int index, affymetrix_calvin_data::ProbeSetQuantificationDetectionData &entry); /*! Opens a group for reading. */ void OpenQuantificationDetectionDataSet(); /*! Returns the data set header. */ DataSetHeader &GetDataSetHeader(); private: /*! Gets a parameter value as a string. * @param name The name of the parameter. * @return The string representation. */ std::wstring GetWStringFromGenericHdr(const std::wstring& name); /*! Adds a parameter name/value pair to the parameters section. * @param name The name of the parameter. * @param value The value of the parameter. * @param reserve The length of the string to reserve. */ void SetWStringToGenericHdr(const std::wstring& name, const std::wstring value, int32_t reserve=-1); /*! Adds the columns to the data set. * @param hdr The data set header. * @param keyIsID True if the probe set ids are to be stored, false for probe set names. */ void AddColumns(DataSetHeader& hdr, bool keyIsID); }; } #endif affxparser/src/fusion/calvin_files/data/src/CHPReseqEntry.h0000644000175200017520000000506214516003651024776 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPReseqEntry_HEADER_ #define _CHPReseqEntry_HEADER_ /*! \file CHPReseqEntry.h Defines a class to store the resequencing analyis results. */ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include // namespace affymetrix_calvin_io { /*! The force call was made due to no signal threshold. */ #define CC_NO_SIGNAL_THR_FORCE_CALL 'N' /*! The force call was made due to weak signal threshold. */ #define CC_WEAK_SIGNAL_THR_FORCE_CALL 'W' /*! The force call was made due to saturation level. */ #define CC_SATURATION_LEVEL_FORCE_CALL 'S' /*! The force call was made due to quality score threshold. */ #define CC_QUALITY_SCORE_THR_FORCE_CALL 'Q' /*! The force call was made due to failed both trace and sequence profiles. */ #define CC_TRACE_AND_SEQUENCE_PROFILES_FORCE_CALL 'F' /*! The force call was made due to base reliability threshold. */ #define CC_RELIABILITY_THR_FORCE_CALL 'B' /*! A structure to hold a force call, its position and reason. * * A force call is the call the algorithm would have made if the thresholds * were not applied. */ typedef struct _CHPReseqForceCall { /*! The position (index) of the call. */ int position; /*! The call at the given position. */ char call; /*! The reason for the call. */ char reason; } CHPReseqForceCall; /*! A structure to hold a base call at a given position (index). */ typedef struct _CHPReseqOrigCall { /*! The position (index) of the call. */ int position; /*! The call at the given position. */ char call; } CHPReseqOrigCall; /*! A structure to hold the base call and score. */ typedef struct _CHPReseqEntry { /*! The call. */ char call; /*! The score for the call. */ float score; } CHPReseqEntry; } #endif affxparser/src/fusion/calvin_files/data/src/CHPTilingData.cpp0000644000175200017520000001641314516003651025252 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CHPTilingData.h" // #include "calvin_files/data/src/DataSetHeader.h" #include "calvin_files/utils/src/StringUtils.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; // Constant column names. static const std::wstring GenomicPositionColName = L"Genomic Position"; static const std::wstring ValueColName = L"Result"; CHPTilingData::CHPTilingData() { entries = 0; Clear(); } CHPTilingData::CHPTilingData(const std::string& filename) { entries = 0; Clear(); SetFilename(filename); DataGroupHeader dcHdr(CHP_TILING_GROUP); genericData.Header().AddDataGroupHdr(dcHdr); genericData.Header().GetGenericDataHdr()->SetFileTypeId(CHP_TILING_TYPE); } void CHPTilingData::GetTilingSequenceEntry(int row, CHPTilingEntry& e) { if (entries && entries->IsOpen()) { entries->GetData(row, 0, e.position); entries->GetData(row, 1, e.value); } } CHPTilingData::~CHPTilingData() { Clear(); } void CHPTilingData::Clear() { if (entries){ entries->Delete(); entries = 0; } genericData.Header().Clear(); cachedNumSequences = -1; } void CHPTilingData::SetFilename(const std::string &p) { genericData.Header().SetFilename(p); } std::string CHPTilingData::GetFilename() const { return ((GenericData&)genericData).Header().GetFilename(); } int32_t CHPTilingData::GetTilingSequenceEntryCount(int index) { DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(0); DataSetHeader dpHdr = dcHdr->GetDataSet(index); return dpHdr.GetRowCnt(); } void CHPTilingData::AddTilingSequenceData(int32_t numEntries, const TilingSequenceData &data) { ParameterNameValueType param; DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(0); int nSets = dcHdr->GetDataSetCnt(); DataSetHeader dpHdr; dpHdr.SetRowCnt(numEntries); wchar_t name[65]; FormatString1(name, 65, L"%d", nSets); dpHdr.SetName(name); param.SetName(TILING_SEQ_NAME); param.SetValueText(data.name); dpHdr.AddNameValParam(param); param.SetName(TILING_SEQ_GROUP_NAME); param.SetValueText(data.groupName); dpHdr.AddNameValParam(param); param.SetName(TILING_SEQ_VERSION); param.SetValueText(data.version); dpHdr.AddNameValParam(param); for (ParameterNameValueTypeList::const_iterator it=data.parameters.begin(); it != data.parameters.end(); it++) { dpHdr.AddNameValParam(*it); } AddColumns(dpHdr); dcHdr->AddDataSetHdr(dpHdr); } TilingSequenceData CHPTilingData::GetTilingSequenceData() { ParameterNameValueType param; TilingSequenceData data; ParameterNameValueTypeConstIt begin; ParameterNameValueTypeConstIt end; if (entries && entries->IsOpen()) { const DataSetHeader &hdr = entries->Header(); hdr.GetNameValIterators(begin, end); for (ParameterNameValueTypeConstIt it=begin; it != end; it++) { const ParameterNameValueType ¶m = *it; if (param.GetName() == TILING_SEQ_NAME) { data.name = param.GetValueText(); } else if (param.GetName() == TILING_SEQ_GROUP_NAME) { data.groupName = param.GetValueText(); } else if (param.GetName() == TILING_SEQ_VERSION) { data.version = param.GetValueText(); } else { data.parameters.push_back(param); } } } return data; } void CHPTilingData::AddColumns(DataSetHeader& hdr) { hdr.AddUIntColumn(GenomicPositionColName); //genomic position - unsigned int 32 hdr.AddFloatColumn(ValueColName); //value - float } void CHPTilingData::OpenTilingSequenceDataSet(int index) { if (entries) entries->Delete(); entries = genericData.DataSet(0, index); if (entries) entries->Open(); } std::wstring CHPTilingData::GetAlgName() { return GetWStringFromGenericHdr(TILING_ALG_NAME); } void CHPTilingData::SetAlgName(const std::wstring& value) { SetWStringToGenericHdr(TILING_ALG_NAME, value); } std::wstring CHPTilingData::GetAlgVersion() { return GetWStringFromGenericHdr(TILING_ALG_VERSION); } void CHPTilingData::SetAlgVersion(const std::wstring& value) { SetWStringToGenericHdr(TILING_ALG_VERSION, value); } int32_t CHPTilingData::GetNumberSequences() { if (cachedNumSequences == -1) cachedNumSequences = GetInt32FromGenericHdr(TILING_NUM_SEQS); return cachedNumSequences; } void CHPTilingData::SetNumberSequences(int32_t value) { SetInt32ToGenericHdr(TILING_NUM_SEQS, value); cachedNumSequences = value; } ParameterNameValueTypeList CHPTilingData::GetAlgParams() { const int len = (int)TILING_PARAM_SUFFIX.length(); int index; ParameterNameValueType param; ParameterNameValueTypeList params; ParameterNameValueTypeIt begin; ParameterNameValueTypeIt end; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->GetNameValIterators(begin, end); for (ParameterNameValueTypeIt it=begin; it != end; it++) { param = *it; index = (int) param.GetName().find(TILING_PARAM_SUFFIX); if (index == 0) { param.SetName( param.GetName().substr(len, param.GetName().length()-len)); params.push_back(param); } } return params; } void CHPTilingData::AddAlgParams(const ParameterNameValueTypeList& params) { ParameterNameValueType param; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); for (ParameterNameValueTypeList::const_iterator it=params.begin(); it != params.end(); it++) { param = *it; param.SetName(TILING_PARAM_SUFFIX + param.GetName()); hdr->AddNameValParam(param); } } std::wstring CHPTilingData::GetWStringFromGenericHdr(const std::wstring& name) { std::wstring result; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(name, paramType)) { result = paramType.GetValueText(); } return result; } void CHPTilingData::SetWStringToGenericHdr(const std::wstring& name, const std::wstring value) { ParameterNameValueType paramType; paramType.SetName(name); paramType.SetValueText(value); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } int32_t CHPTilingData::GetInt32FromGenericHdr(const std::wstring& name) { int32_t result = 0; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(name, paramType)) { result = paramType.GetValueInt32(); } return result; } void CHPTilingData::SetInt32ToGenericHdr(const std::wstring& name, int32_t value) { ParameterNameValueType paramType; paramType.SetName(name); paramType.SetValueInt32(value); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } affxparser/src/fusion/calvin_files/data/src/CHPTilingData.h0000644000175200017520000001435114516003651024716 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPTilingData_HEADER_ #define _CHPTilingData_HEADER_ #include "calvin_files/data/src/CHPTilingEntry.h" #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/data/src/GenericData.h" #include "calvin_files/data/src/TilingResultData.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include // namespace affymetrix_calvin_io { /*! The identifier to identify a tiling CHP file. */ #define CHP_TILING_TYPE std::string("affymetrix-tiling-analysis") /*! The identifier for the type of data stored in the file. */ #define TILING_DATA_TYPE std::wstring(L"file_type") /*! The identifier for the scale of the data. */ #define TILING_PARAM_SUFFIX std::wstring(L"Param-") /*! The identifier for the algorithms tail type. */ #define TILING_NUM_SEQS std::wstring(L"NumberSequences") /*! The identifier for the algorithm name. */ #define TILING_ALG_NAME std::wstring(L"Algorithm-Name") /*! The identifier for the algorithm version. */ #define TILING_ALG_VERSION std::wstring(L"Algorithm-Version") /*! The name of the tiling data group. */ #define CHP_TILING_GROUP std::wstring(L"Tiling Results") /*! The id for the sequence name. */ #define TILING_SEQ_NAME std::wstring(L"Name") /*! The id for the sequence group name. */ #define TILING_SEQ_GROUP_NAME std::wstring(L"GroupName") /*! The id for the sequence version. */ #define TILING_SEQ_VERSION std::wstring(L"Version") /*! The value to indicate signal values are stored in the CHP file. */ #define TILING_SIGNAL_VALUES std::wstring(L"Signal") /*! The value to indicate p-values are stored in the CHP file. */ #define TILING_PVALUE_VALUES std::wstring(L"p-value") /*! Holds data associated with tiling array CHP files. */ class CHPTilingData { public: /*! Constructor */ CHPTilingData(); /*! Constructor with file name. * @param filename The name of the CHP file. */ CHPTilingData(const std::string& filename); /*! Destructor */ ~CHPTilingData(); private: /*! The generic data item. */ GenericData genericData; /*! keep number of sequences from being read from the header all the time */ int32_t cachedNumSequences; /*! chp data sets */ DataSet* entries; public: /*! Clears the members. */ void Clear(); /*! Sets the file name. * @param p The name of the CHP file */ void SetFilename(const std::string &p); /*! Gets the file name. * @return The file name. */ std::string GetFilename() const; /*! Gets the number of sequences. * @return The number of sequences. */ int32_t GetNumberSequences(); /*! Sets the number of sequences. * @param value The number of sequences. */ void SetNumberSequences(int32_t value); /*! Gets the name of the algorithm. * @return The algorithm name. */ std::wstring GetAlgName(); /*! Sets the name of the algorithm. * @param value The algorithm name. */ void SetAlgName(const std::wstring& value); /*! Gets the algorithm version. * @return The version. */ std::wstring GetAlgVersion(); /*! Sets the algorithm version. * @param value The version. */ void SetAlgVersion(const std::wstring& value); /*! Gets the algorithm parameters * @return The algoirhtm parameters. */ ParameterNameValueTypeList GetAlgParams(); /*! Adds the algorithm parameters. * @param params The algorithm parameters. */ void AddAlgParams(const ParameterNameValueTypeList& params); /*! Gets the file header. * @return The file header. */ FileHeader* GetFileHeader() { return &genericData.Header(); } /*! Gets the generic data object. * @return The data object. */ GenericData& GetGenericData() { return genericData; } /*! Adds a data group to the file. * @param numEntries The number of entries in the sequence. * @param data The data associated with the sequence. */ void AddTilingSequenceData(int32_t numEntries, const TilingSequenceData &data); /*! Gets the sequence data. * @return The data associated with the sequence. */ TilingSequenceData GetTilingSequenceData(); /*! Gets the number of entries in a tiling sequence. * @param index The sequence index. * @return The number of entries in the sequence. */ int32_t GetTilingSequenceEntryCount(int index); /*! Opens a group for reading. * @param index The index to the sequence. */ void OpenTilingSequenceDataSet(int index); /*! Returns the entry for the given row. The data set must be open. * @param row The row index. * @param e The entry. */ void GetTilingSequenceEntry(int row, CHPTilingEntry& e); private: /*! Gets a parameter value as a string. * @param name The name of the parameter. * @return The string representation. */ std::wstring GetWStringFromGenericHdr(const std::wstring& name); /*! Adds a parameter name/value pair to the parameters section. * @param name The name of the parameter. * @param value The value of the parameter. */ void SetWStringToGenericHdr(const std::wstring& name, const std::wstring value); /*! Gets a parameter value as an integer. * @param name The name of the parameter. * @return The integer representation. */ int32_t GetInt32FromGenericHdr(const std::wstring& name); /*! Adds a parameter name/value pair to the parameters section. * @param name The name of the parameter. * @param value The value of the parameter. */ void SetInt32ToGenericHdr(const std::wstring& name, int32_t value); /*! Adds the columns to the data set. * @param hdr The data set header. */ void AddColumns(DataSetHeader& hdr); }; } #endif affxparser/src/fusion/calvin_files/data/src/CHPTilingEntry.h0000644000175200017520000000246614516003651025152 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPTilingEntry_HEADER_ #define _CHPTilingEntry_HEADER_ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include // namespace affymetrix_calvin_io { /*! Stores the data for a single genomic data item. */ typedef struct _CHPTilingEntry { /*! The genomic position. */ u_int32_t position; /*! The value associated with the position. */ float value; } CHPTilingEntry; } #endif // _CHPTilingEntry_HEADER_ affxparser/src/fusion/calvin_files/data/src/CHPUniversalEntry.cpp0000644000175200017520000000265514516003651026227 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CHPUniversalEntry.h" // using namespace affymetrix_calvin_io; CHPUniversalEntry::CHPUniversalEntry() { Clear(); } CHPUniversalEntry::CHPUniversalEntry(float bg) { background = bg; } CHPUniversalEntry::~CHPUniversalEntry() {} void CHPUniversalEntry::Clear() { background = 0.0; } CHPUniversalEntry CHPUniversalEntry::operator=(CHPUniversalEntry zn) { background = zn.GetBackground(); return *this; } float CHPUniversalEntry::GetBackground() const { return background; } void CHPUniversalEntry::SetBackground(float p) { background = p; } affxparser/src/fusion/calvin_files/data/src/CHPUniversalEntry.h0000644000175200017520000000353114516003651025666 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPUniversalEntry_HEADER_ #define _CHPUniversalEntry_HEADER_ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class stores a zone's background value */ class CHPUniversalEntry { private: float background; public: CHPUniversalEntry(); CHPUniversalEntry(float background); ~CHPUniversalEntry(); void Clear(); /*! Assignment operator * @param zn The zone to copy * @return The new zone object */ CHPUniversalEntry operator=(CHPUniversalEntry zn); float GetBackground() const; void SetBackground(float p); }; /*! An STL list of zones */ typedef std::vector CHPUniversalEntryVector; /*! iterator of CHPUniversalEntrys */ typedef std::vector::iterator CHPUniversalEntryVectorIt; } #endif // _CHPUniversalEntry_HEADER_ affxparser/src/fusion/calvin_files/data/src/ChromosomeMultiDataSummaryData.h0000644000175200017520000000367714516003651030444 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ChromosomeMultiDataSummaryData_HEADER_ #define _ChromosomeMultiDataSummaryData_HEADER_ /*! \file ChromosomeMultiDataSummaryData.h This file provides types to hold chromosome multi data results. */ #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include // namespace affymetrix_calvin_data { typedef struct _ChromosomeMultiDataSummaryData { /*! The chromosome number. */ u_int8_t chr; /*! The chromosome number for display. */ std::string display; /*! The starting index */ u_int32_t startIndex; /*! The number of markers */ u_int32_t markerCount; /*! The minimum signal */ float minSignal; /*! The maximum signal */ float maxSignal; /*! The median copy number state */ float medianCnState; /*! The hom frequency */ float homFrequency; /*! The het frequency */ float hetFrequency; /*! Other metrics. */ std::vector metrics; } ChromosomeMultiDataSummaryData; } #endif affxparser/src/fusion/calvin_files/data/src/ChromosomeSegmentMultiDataData.h0000644000175200017520000000532614516003651030402 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ChromosomeSegmentMultiDataData_HEADER_ #define _ChromosomeSegmentMultiDataData_HEADER_ /*! \file ChromosomeSegmentMultiDataData.h This file provides types to store results for a segment. */ // #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include #include // namespace affymetrix_calvin_data { typedef struct _ChromosomeSegmentData { /*! The segment id */ u_int32_t segmentId; /*! The chromosome number. */ u_int8_t chr; /*! The start position on the chromosome. */ u_int32_t startPosition; /*! The stop position on the chromosome. */ u_int32_t stopPosition; /*! The number of markers in the segment. */ int markerCount; /*! The mean marker distance. */ u_int32_t meanMarkerDistance; /*! Other metrics associated with the segment. */ std::vector metrics; } ChromosomeSegmentData; typedef struct _ChromosomeSegmentDataEx { /*! The segment id */ u_int32_t segmentId; /*! The reference sample key. */ u_int32_t referenceSampleKey; /*! The familial sample key. */ u_int32_t familialSampleKey; /*! The chromosome number. */ u_int8_t chr; /*! The start position on the chromosome. */ u_int32_t startPosition; /*! The stop position on the chromosome. */ u_int32_t stopPosition; /*! The segment call */ u_int8_t call; /*! The confidence in the call */ float confidence; /*! The number of markers in the segment. */ int markerCount; /*! The homozygosity */ float homozygosity; /*! The heterozygosity */ float heterozygosity; /*! Other metrics associated with the segment. */ std::vector metrics; } ChromosomeSegmentDataEx; } #endif affxparser/src/fusion/calvin_files/data/src/ColumnInfo.cpp0000644000175200017520000000425514516003651024751 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! \file ColumnInfo.cpp This file defines column information class of various types. */ #include "calvin_files/data/src/ColumnInfo.h" namespace affymetrix_calvin_io { /*! Constructor - used only by dervied types * @param name_ Name of the column. * @param type_ Type of data in the column. * @param size_ Size of each element of in the column. * @param len_ Number of elements of type in the column. * @param overhead_ Number of extra bytes in the column */ ColumnInfo::ColumnInfo(const std::wstring& name_, DataSetColumnTypes type_, int32_t size_, int32_t len_, int32_t overhead_) : name(name_), type(type_), size(size_), len(len_), overhead(overhead_){ } /*! Constructor - used by the file read operation * @param name_ Name of the column. * @param type_ Type of data in the column. * @param totalSize Total size of the colum in bytes. */ ColumnInfo::ColumnInfo(const std::wstring& name_, DataSetColumnTypes type_, int32_t totalSize) : name(name_), type(type_), size(totalSize), len(1), overhead(0) { if (type == ASCIICharColType || type == UnicodeCharColType) { overhead = 4; if (type == UnicodeCharColType) { size = sizeof(int16_t); len = (totalSize - overhead) / sizeof(int16_t); } else if (type == ASCIICharColType) { size = sizeof(int8_t); len = (totalSize - overhead) / sizeof(int8_t); } } } }; affxparser/src/fusion/calvin_files/data/src/ColumnInfo.h0000644000175200017520000001236014516003651024412 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ColumnInfo_HEADER_ #define _ColumnInfo_HEADER_ /*! \file ColumnInfo.h This file defines column information class of various types. */ // #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! Data dataSet column data types */ enum DataSetColumnTypes { ByteColType, UByteColType, ShortColType, UShortColType, IntColType, UIntColType, FloatColType, ASCIICharColType, UnicodeCharColType }; /*! Base class for the varous columns */ class ColumnInfo { private: /*! Name of the column. */ std::wstring name; /*! Type of data in this column */ DataSetColumnTypes type; /*! size of an individual element in bytes */ int32_t size; /*! number of elements in column */ int32_t len; /*! overhead size in bytes */ int32_t overhead; protected: /*! Constructor - used only by dervied types * @param name_ Name of the column. * @param type_ Type of data in the column. * @param size_ Size of each element of in the column. * @param len_ Number of elements of type in the column. * @param overhead_ Number of extra bytes in the column */ ColumnInfo(const std::wstring& name_, DataSetColumnTypes type_, int32_t size_, int32_t len_, int32_t overhead_); public: /*! Constructor - used by the file read operation * @param name_ Name of the column. * @param type_ Type of data in the column. * @param totalSize Total size of the colum in bytes. */ ColumnInfo(const std::wstring& name_, DataSetColumnTypes type_, int32_t totalSize); /*! Equiality operator * @param p object to compare against */ bool operator==(const ColumnInfo &p) const { return (name == p.name && type == p.type && size == p.size); } /*! Inequiality operator * @param p object to compare against */ bool operator!=(const ColumnInfo &p) const { return (name != p.name || type != p.type || size != p.size); } /*! Returns the type of the data in the column * @return The type of the data in the column */ DataSetColumnTypes GetColumnType() const { return type; } /*! Returns the total size of the column in bytes. * @return Size in bytes of the column */ int32_t GetSize() const { return size*len + overhead; } /*! Returns the number of elements of type in the column * @return Number of elements of type in the column */ int32_t GetLength() const { return len; } /*! Get the column name. * @return The column name. */ std::wstring GetName() { return name; } }; /*! Byte column */ class ByteColumn : public ColumnInfo { public: ByteColumn(const std::wstring& name_) : ColumnInfo(name_, ByteColType, sizeof(int8_t), 1, 0) {} }; /*! Unsigned byte column */ class UByteColumn : public ColumnInfo { public: UByteColumn(const std::wstring& name_) : ColumnInfo(name_, UByteColType, sizeof(u_int8_t), 1, 0) {} }; /*! Short column */ class ShortColumn : public ColumnInfo { public: ShortColumn(const std::wstring& name_) : ColumnInfo(name_, ShortColType, sizeof(int16_t), 1, 0) {} }; /*! Unsigned short column */ class UShortColumn : public ColumnInfo { public: UShortColumn(const std::wstring& name_) : ColumnInfo(name_, UShortColType, sizeof(u_int16_t), 1, 0) {} }; /*! Int (int32_t) column */ class IntColumn : public ColumnInfo { public: IntColumn(const std::wstring& name_) : ColumnInfo(name_, IntColType, sizeof(int32_t), 1, 0) {} }; /*! Unsigned int (u_int32_t) column */ class UIntColumn : public ColumnInfo { public: UIntColumn(const std::wstring& name_) : ColumnInfo(name_, UIntColType, sizeof(u_int32_t), 1, 0) {} }; /*! Float column */ class FloatColumn : public ColumnInfo { public: FloatColumn(const std::wstring& name_) : ColumnInfo(name_, FloatColType, sizeof(float), 1, 0) {} }; /*! ASCII string column */ class ASCIIColumn : public ColumnInfo { public: ASCIIColumn(const std::wstring& name_, int32_t maxLn) : ColumnInfo(name_, ASCIICharColType, sizeof(int8_t), maxLn, 4) {} }; /* Unicode string column */ class UnicodeColumn : public ColumnInfo { public: UnicodeColumn(const std::wstring& name_, int32_t maxLn) : ColumnInfo(name_, UnicodeCharColType, sizeof(int16_t), maxLn, 4) {} }; /*! vector of column info */ typedef std::vector ColInfoVector; /*! iterator of column info */ typedef std::vector::const_iterator ColumnInfoConstIt; } #endif // _ColumnInfo_HEADER_ affxparser/src/fusion/calvin_files/data/src/CytoGenotypeCallMultiDataData.h0000644000175200017520000000352114516003651030164 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _Cyto_GenotypeCall_HEADER_ #define _Cyto_GenotypeCall_HEADER_ /*! \file CytoGenotypeCallMultiDataType.h This file provides types to hold genotyping results for cytgo arrays. */ #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // namespace affymetrix_calvin_data { /*! Holds the marker AB signal results */ typedef struct _CytoGenotypeCallData { /*! The index relative to the probeset in the CopyNumber data set of the ProbeSet data group (0 based). */ u_int32_t index; /*! The call. */ u_int8_t call; /*! The cpnfidence */ float confidence; /*! The forced call. */ u_int8_t forcedCall; /*! The A signal */ float aSignal; /*! The B signal */ float bSignal; /*! The signal strength */ float signalStrength; /*! The contrast */ float contrast; /*! The extra metrics. */ std::vector metrics; } CytoGenotypeCallData; } #endif affxparser/src/fusion/calvin_files/data/src/DATData.cpp0000644000175200017520000002734414516003651024106 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/DATData.h" // #include "calvin_files/array/src/ArrayId.h" #include "calvin_files/data/src/DataSetHeader.h" #include "calvin_files/data/src/GenericDataTypes.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; // Constant column names. static const std::wstring MinIntensityColName = L"Min Intensity"; static const std::wstring MaxIntensityColName = L"Max Intensity"; DATData::DATData() { globalGridStatus = 0; dsPixel = 0; dsSMPixel = 0; dsStats = 0; Clear(); } DATData::DATData(const std::string &filename) { globalGridStatus = 0; dsPixel = 0; dsStats = 0; dsSMPixel = 0; Clear(); SetFilename(filename); genericData.Header().GetGenericDataHdr()->SetFileTypeId(SCAN_ACQUISITION_DATA_TYPE); DataGroupHeader dcHdr(DAT_DATAGROUP); genericData.Header().AddDataGroupHdr(dcHdr); } DATData::~DATData() { Clear(); } void DATData::Clear() { if (dsPixel){ dsPixel->Delete(); dsPixel = 0; } if (dsSMPixel){ dsSMPixel->Delete(); dsSMPixel = 0; } if (dsStats){ dsStats->Delete(); dsStats = 0; } globalGrid.Clear(); subgrids.clear(); genericData.Header().Clear(); setPixelMetaData = false; setStatsMetaData = false; cachedRows = -1; cachedCols = -1; } void DATData::SetFilename(const std::string &p) { genericData.Header().SetFilename(p); } std::string DATData::GetFilename() { return genericData.Header().GetFilename(); } void DATData::SetPixelCount(int32_t ln) { DataSetHeader dpHdr; dpHdr.SetRowCnt(ln); dpHdr.SetName(DAT_PIXEL); dpHdr.AddUShortColumn(DAT_PIXEL); if(setPixelMetaData) { UpdateDataSetRowCount(dpHdr); } else { InsertDataSetHeader(dpHdr); setPixelMetaData = true; } } void DATData::SetStatsCount(int32_t ln) { DataSetHeader dpHdr; dpHdr.SetRowCnt(ln); dpHdr.SetName(DAT_STATS); dpHdr.AddUShortColumn(MinIntensityColName); dpHdr.AddUShortColumn(MaxIntensityColName); if(setStatsMetaData) { UpdateDataSetRowCount(dpHdr); } else { InsertDataSetHeader(dpHdr); setStatsMetaData = true; } } void DATData::UpdateDataSetRowCount(const DataSetHeader &hdr) { DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(0); bool found = false; int sz = dcHdr->GetDataSetCnt(); for(int i = 0; i < sz; i++) { DataSetHeader* dpHdr = &dcHdr->GetDataSet(i); if(dpHdr->GetName() == hdr.GetName()) { dpHdr->SetRowCnt(hdr.GetRowCnt()); found = true; break; } } if(!found) { InsertDataSetHeader(hdr); } } void DATData::InsertDataSetHeader(const DataSetHeader &hdr) { DataGroupHeader* dcHdr = &genericData.Header().GetDataGroup(0); dcHdr->AddDataSetHdr(hdr); } std::wstring DATData::GetArrayType() { return GetWStringFromGenericHdr(ARRAY_TYPE_PARAM_NAME); } void DATData::SetArrayType(const std::wstring& value) { SetWStringToGenericHdr(ARRAY_TYPE_PARAM_NAME, value, ARRAY_TYPE_MAX_LEN); } /* * Get the array id. */ AffymetrixGuidType DATData::GetArrayId() { AffymetrixGuidType guid; GenericDataHeader *parentGDH = GetParentArrayGenericDataHeader(); ParameterNameValueType nvt; if (parentGDH->FindNameValParam(ARRAY_ID_PARAM_NAME, nvt)) { guid = nvt.GetValueAscii(); } return guid; } /* * Set the array id. */ void DATData::SetArrayId(AffymetrixGuidType& value) { ParameterNameValueType nvt; nvt.SetName(ARRAY_ID_PARAM_NAME); nvt.SetValueAscii(value, AFFY_GUID_LEN); GenericDataHeader *parentGDH = GetParentArrayGenericDataHeader(); parentGDH->AddNameValParam(nvt); } /* * Get the array barcode. */ std::wstring DATData::GetArrayBarcode() { std::wstring result; GenericDataHeader *parentGDH = GetParentArrayGenericDataHeader(); ParameterNameValueType nvt; if (parentGDH->FindNameValParam(ARRAY_BARCODE_PARAM_NAME, nvt)) { result = nvt.GetValueText(); } return result; } /* * Set the array barcode. */ void DATData::SetArrayBarcode(std::wstring& value) { ParameterNameValueType nvt; nvt.SetName(ARRAY_BARCODE_PARAM_NAME); nvt.SetValueText(value, ARRAY_BARCODE_MAX_LEN); GenericDataHeader *parentGDH = GetParentArrayGenericDataHeader(); parentGDH->AddNameValParam(nvt); } float DATData::GetPixelSize() { float result = 0.0f; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(PIXEL_SIZE_PARAM_NAME, paramType)) { result = paramType.GetValueFloat(); } return result; } void DATData::SetPixelSize(float value) { ParameterNameValueType paramType; paramType.SetName(PIXEL_SIZE_PARAM_NAME); paramType.SetValueFloat(value); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } std::wstring DATData::GetScannerType() { return GetWStringFromGenericHdr(SCANNER_TYPE_PARAM_NAME); } void DATData::SetScannerType(const std::wstring& value) { SetWStringToGenericHdr(SCANNER_TYPE_PARAM_NAME, value); } std::wstring DATData::GetScannerID() { return GetWStringFromGenericHdr(SCANNER_ID_PARAM_NAME); } void DATData::SetScannerID(const std::wstring& value) { SetWStringToGenericHdr(SCANNER_ID_PARAM_NAME, value); } std::wstring DATData::GetExposureTime() { return GetWStringFromGenericHdr(PLATE_EXPOSURE_TIME); } void DATData::SetExposureTime(std::wstring& value) { SetWStringToGenericHdr(PLATE_EXPOSURE_TIME, value); } int32_t DATData::GetPegSubImageIndex() { return GetInt32FromGenericHdr(PLATE_PEG_SUBIMAGE_INDEX); } void DATData::SetPegSubImageIndex(int32_t value) { SetInt32ToGenericHdr(PLATE_PEG_SUBIMAGE_INDEX, value); } affymetrix_calvin_utilities::DateTime DATData::GetScanDate() { std::wstring dateString = GetWStringFromGenericHdr(SCAN_DATE_PARAM_NAME); DateTime dt = DateTime::Parse(dateString); return dt; } void DATData::SetScanDate(affymetrix_calvin_utilities::DateTime value) { std::wstring dateString = value.ToString(); SetWStringToGenericHdr(SCAN_DATE_PARAM_NAME, dateString); } int32_t DATData::GetRows() { if (cachedRows == -1) cachedRows = GetInt32FromGenericHdr(ROWS_PARAM_NAME); return cachedRows; } void DATData::SetRows(int32_t value) { SetInt32ToGenericHdr(ROWS_PARAM_NAME, value); cachedCols = value; } int32_t DATData::GetCols() { if (cachedCols == -1) cachedCols = GetInt32FromGenericHdr(COLS_PARAM_NAME); return cachedCols; } void DATData::SetCols(int32_t value) { SetInt32ToGenericHdr(COLS_PARAM_NAME, value); cachedCols = value; } void DATData::AddSubgrid(u_int32_t status, const FRegion& subgrid) { subgridsStatus.push_back(status); subgrids.push_back(subgrid); } std::wstring DATData::GetWStringFromGenericHdr(const std::wstring& name) { std::wstring result; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(name, paramType)) { result = paramType.GetValueText(); } return result; } void DATData::SetWStringToGenericHdr(const std::wstring& name, const std::wstring value, int32_t reserve) { ParameterNameValueType paramType; paramType.SetName(name); paramType.SetValueText(value, reserve); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } int32_t DATData::GetInt32FromGenericHdr(const std::wstring& name) { int32_t result = 0; ParameterNameValueType paramType; GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); if (hdr->FindNameValParam(name, paramType)) { result = paramType.GetValueInt32(); } return result; } void DATData::SetInt32ToGenericHdr(const std::wstring& name, int32_t value) { ParameterNameValueType paramType; paramType.SetName(name); paramType.SetValueInt32(value); GenericDataHeader* hdr = genericData.Header().GetGenericDataHdr(); hdr->AddNameValParam(paramType); } /* * Fills the array with pixel values */ bool DATData::GetPixels(u_int16_t* pixelBuffer, u_int32_t startRow, u_int32_t rowCnt) { PreparePixelSet(); if (dsPixel && dsPixel->IsOpen()) { if (rowCnt+startRow <= (u_int32_t)GetRows()) { int32_t startIdx = startRow*GetCols(); int32_t count = rowCnt*GetCols(); if (startIdx+count <= dsPixel->Rows()) { int32_t returned = dsPixel->GetDataRaw(0, startIdx, count, pixelBuffer); return (count == returned); } } } return false; } /* * Fills the array with requested pixel values. */ bool DATData::GetPixels(u_int16_t* pixelBuffer, u_int32_t startRow, u_int32_t startCol, u_int32_t rowCnt, u_int32_t colCnt) { PrepareSmallMemoryPixelSet(); if (dsSMPixel && dsSMPixel->IsOpen()) { if (rowCnt+startRow <= (u_int32_t)GetRows() && colCnt+startCol <= (u_int32_t)GetCols()) { u_int16_t* pb = pixelBuffer; // Copy one row at a time into the buffer. for (int32_t row = startRow; row < startRow+rowCnt; ++row) { int32_t dsRow = row*GetCols() + startCol; if (dsRow+colCnt <= dsSMPixel->Rows()) { int32_t returned = dsSMPixel->GetDataRaw(0, dsRow, colCnt, pb); if (returned != colCnt) return false; pb += colCnt; } } return true; } } return false; } bool DATData::GetRange(u_int16_t& min, u_int16_t& max) { PrepareStatsSet(); if (dsStats && dsStats->IsOpen()) { if (dsStats->Rows() > 0) { dsStats->GetData(0, 0, min); dsStats->GetData(0, 1, max); return true; } } return false; } void DATData::PreparePixelSet() { PrepareSet(DAT_PIXEL, dsPixel); } void DATData::PrepareSmallMemoryPixelSet() { genericData.UseMemoryMapping(false); genericData.LoadEntireDataSetHint(false); PrepareSet(DAT_PIXEL, dsSMPixel); genericData.UseMemoryMapping(true); } void DATData::PrepareStatsSet() { PrepareSet(DAT_STATS, dsStats); } void DATData::PrepareSet(const std::wstring& name, DataSet*& ds) { if (ds == 0) { ds = genericData.DataSet(DAT_DATAGROUP, name); if (ds) ds->Open(); } } GenericDataHeader* DATData::GetParentArrayGenericDataHeader() { GenericDataHeader* parentGDH = genericData.Header().GetGenericDataHdr()->FindParent(ARRAY_TYPE_IDENTIFIER); if (parentGDH == 0) { // Create a new parent GenericDataHeader and add to the current GenericDataHeader GenericDataHeader gdh; gdh.SetFileTypeId(ARRAY_TYPE_IDENTIFIER); genericData.Header().GetGenericDataHdr()->AddParent(gdh); parentGDH = genericData.Header().GetGenericDataHdr()->FindParent(ARRAY_TYPE_IDENTIFIER); } return parentGDH; } /* * Add a grid alignment algorithm parameter. */ void DATData::AddGridAlignmentAlgorithmParameter(const ParameterNameValueType& nvt) { gridAlignParams.push_back(nvt); } /* * Remove all grid alignment algorithm parameters. */ void DATData::ClearGridAlignmentAlgorithmParameters() { gridAlignParams.clear(); } /* * Return a grid alignment algorithm parameter given a name. */ bool DATData::FindGridAlignmentAlgorithmParameter(const std::wstring& name, ParameterNameValueType& param) { for (ParameterNameValueTypeIt ii = gridAlignParams.begin(); ii != gridAlignParams.end(); ++ii) { if (name == ii->GetName()) { param = *ii; return true; } } return false; } /* * Return the grid alignment algorithm parameters. */ void DATData::GetGridAlignmentAlgorithmParameters(ParameterNameValueTypeVector& algParams) { algParams = gridAlignParams; } affxparser/src/fusion/calvin_files/data/src/DATData.h0000644000175200017520000002625714516003651023555 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DATData_HEADER_ #define _DATData_HEADER_ /*! \file DATData.h This file defines a data container class for DAT data. */ #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/data/src/GenericData.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/Coords.h" // #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif using namespace affymetrix_calvin_utilities; namespace affymetrix_calvin_io { #define DAT_DATAGROUP std::wstring(L"Default Cube") #define DAT_PIXEL std::wstring(L"Pixel") #define DAT_STATS std::wstring(L"Stats") #define DAT_GLOBAL_GRID std::wstring(L"GlobalGrid") #define DAT_SUBGRID std::wstring(L"SubGrid") /*! This class is a data container object for DAT data. */ class DATData { public: /*! Constructor */ DATData(); /*! Constructor * @param filename Name of the DAT file */ DATData(const std::string &filename); /*! Destructor */ ~DATData(); /*! The grid and subgrid status flags. May be combined in an OR fashion. */ enum GridStatus { GridOK = 0x1, GridError = 0x2, GridManualAdjust = 0x4 }; private: /*! generic file object */ GenericData genericData; bool setPixelMetaData; bool setStatsMetaData; /*! keep rows from being read from the header all the time */ int32_t cachedRows; /*! keep cols from being read from the header all the time */ int32_t cachedCols; /*! Pixel intensity DataSet */ DataSet* dsPixel; /*! Small memory pixel DataSet */ DataSet* dsSMPixel; /*! Stats DataSet */ DataSet* dsStats; /*! Global grid */ FRegion globalGrid; /*! Global grid status flag - indicates the state of the global grid */ u_int32_t globalGridStatus; /*! Subgrids */ FRegionVector subgrids; /*! Subgrids status flag - indicates the state of each subgrid */ Uint32Vector subgridsStatus; /*! Grid Alignment Algorithm Parameters */ ParameterNameValueTypeVector gridAlignParams; public: /*! Clear the class members */ void Clear(); /*! Set the file name. * @param p Name of the file. */ void SetFilename(const std::string &p); /*! Get the file name. * @return Returns the file name. */ std::string GetFilename(); /*! Set the total number of pixels in the DAT. * @param ln Number of pixels. */ void SetPixelCount(int32_t ln); /*! Set the total number of statistic rows in the DAT. * @param ln Number of rows of statistic rows. */ void SetStatsCount(int32_t ln); /*! Get the FileHeader. * @return Returns a pointer to the FileHeader. */ FileHeader* GetFileHeader() { return &genericData.Header(); } /*! Get the GenericData object. * @return Returns a reference to the GenericData object. */ GenericData& GetGenericData() { return genericData; } // should be a friend method only /*! Get the array type. * @return Returns the name of the array type of the DAT */ std::wstring GetArrayType(); /*! Set the array type. * @param value The name of the DAT array type. */ void SetArrayType(const std::wstring& value); /*! Get the array id. * @return The array id. */ affymetrix_calvin_utilities::AffymetrixGuidType GetArrayId(); /*! Set the array id. * @param value The array id. */ void SetArrayId(affymetrix_calvin_utilities::AffymetrixGuidType& value); /*! Get the array barcode. * @return The barcode. */ std::wstring GetArrayBarcode(); /*! Set the array barcode. * @param value The barcode. */ void SetArrayBarcode(std::wstring& value); /*! Get the pixel size. * @return Returns the pixel size in microns. */ float GetPixelSize(); /*! Set the pixel size. * @param value The pixel size in microns. */ void SetPixelSize(float value); /*! Get the type of scanner used to generate the DAT. * @return Returns the type of scanner used to generate the DAT. */ std::wstring GetScannerType(); /*! Set the type of scanner used to generate the DAT. * @param value The type of scanner used to generate the DAT. */ void SetScannerType(const std::wstring& value); /*! Get the ID of the scanner used to generate the DAT. * @return Returns the ID of the scanner used to generate the DAT. */ std::wstring GetScannerID(); /*! Set the ID of the scanner used to generate the DAT. * @param value The ID of the scanner used to generate the DAT. */ void SetScannerID(const std::wstring& value); std::wstring GetExposureTime(); void SetExposureTime(std::wstring& value); /*! Get the index of the subimage of the peg. * @return Return the index of the subimage of the peg. */ int32_t GetPegSubImageIndex(); /*! Set the index of the subimage of the peg. * @return void and set the subimage of the peg to the DAT file. */ void SetPegSubImageIndex(int32_t value); /*! Get the date of the scan. * @return Returns the date of the scan. */ affymetrix_calvin_utilities::DateTime GetScanDate(); /*! Set the date of the scan. * @param value The date of the scan. */ void SetScanDate(affymetrix_calvin_utilities::DateTime value); /*! Get the number of rows of pixels in the DAT. * @return Returns the number of pixel rows in the DAT. */ int32_t GetRows(); /*! Set the number of rows of pixels in the DAT. * @param value The number of pixel rows in the DAT. */ void SetRows(int32_t value); /*! Get the number of columns of pixels in the DAT. * @return Returns the number of pixel columns in the DAT. */ int32_t GetCols(); /*! Set the number of colums of pixels in the DAT. * @param value The number of pixel columns in the DAT. */ void SetCols(int32_t value); /*! Fills the array with pixel values * @param pixelBuffer Pointer to the buffer to receive pixel intensities * @param startRow The row from which to start copying pixel intensities * @param rowCnt The number of rows of pixel intensities to copy to the buffer * @return true if the method succeeded */ bool GetPixels(u_int16_t* pixelBuffer, u_int32_t startRow, u_int32_t rowCnt); /*! Fills the array with requested pixel values. Uses a small-memory footprint access. Good for retrieving small parts of the image. * @param pixelBuffer Pointer to the buffer to receive the pixel intensities * @param startRow The row from which to start copying pixel intensities * @param startCol The col from which to start copying pixel intensities * @param rowCnt The number of rows of pixel intensities to copy to the buffer * @param colCnt The number of columns of pixel intensities to copy to the buffer * @return true if the method succeeded */ bool GetPixels(u_int16_t* pixelBuffer, u_int32_t startRow, u_int32_t startCol, u_int32_t rowCnt, u_int32_t colCnt); /*! Gets the range of pixel intensities. * @param min Reference to a u_int16_t to receive the min pixel intensity * @param max Reference to a u_int16_t to receive the max pixel intensity * @return true if the max and min intensities can be returned. */ bool GetRange(u_int16_t& min, u_int16_t& max); /*! Get the global grid positions. Use FGridCoords::FGridCoords(FRegion&) cast constructor to convert to FGridCoords. * @return Global grid position. */ FRegion GetGlobalGrid() const { return globalGrid; } /*! Get the global grid status. * @return Returns the status of the global grid. Can be a combination of GridStatus values. */ u_int32_t GetGlobalGridStatus() const { return globalGridStatus; } /*! Has global grid data been set. * @return True if there is global grid data. */ bool HasGlobalGridData() const { return globalGrid.pts.size() > 0; } /*! Set the global grid positions. Use FRegion::FRegion(FGridCoords&) cast constructor to convert from FGridCoords. * @param gridStatus A combination of the GridStatus flags indicating the status of the global grid. * @param grid Global grid position */ void SetGlobalGrid(u_int32_t gridStatus, FRegion& grid){ globalGridStatus = gridStatus, globalGrid = grid; } /*! Get the number of subgrids. * @return Number of subgrids */ int32_t GetSubgridCnt()const { return (int32_t)subgrids.size(); } /*! Return a subgrid by index. * @param index Index of the subgrid to get * @return A region defining the subgrid. */ FRegion GetSubgrid(int32_t index)const { return subgrids.at(index); } /*! Get the status of a subgrid. * @param index Index of the subgrid status to get. * @return Returns the status of the subgrid. Can be a combination of GridStatus values. */ u_int32_t GetSubgridStatus(int32_t index) const { return subgridsStatus.at(index); } /*! Add a subgrid. * @param status A combination of the GridStatus flags indicating the status of the subgrid grid. * @param subgrid Subgrid region to be added. */ void AddSubgrid(u_int32_t status, const FRegion& subgrid); /*! Clear subgrids. */ void ClearSubgrids(){ subgrids.clear(); subgridsStatus.clear(); } /*! Add a grid alignment algorithm parameter. * If the parameter already exists, it will be overwritten. * @param nvt Algorithm parameter to add to the list. */ void AddGridAlignmentAlgorithmParameter(const ParameterNameValueType& nvt); /*! Remove all grid alignment algorithm parameters. */ void ClearGridAlignmentAlgorithmParameters(); /*! Return a grid alignment algorithm parameter given a name. * @param name Name of the grid alignment algorithm parameter to find. * @param param The found parameter. * @return True if the parameter was found. */ bool FindGridAlignmentAlgorithmParameter(const std::wstring& name, ParameterNameValueType& param); /*! Return the grid alignment algorithm parameters. * @param algParams Vector with grid alignment algorithm parameters. */ void GetGridAlignmentAlgorithmParameters(ParameterNameValueTypeVector& algParams); /*! Get a pointer to the parent array GenericDataHeader object. * @return A pointer to the parent array GenericDataHeader. Never null. */ GenericDataHeader* GetParentArrayGenericDataHeader(); private: void InsertDataSetHeader(const DataSetHeader &hdr); void UpdateDataSetRowCount(const DataSetHeader &hdr); std::wstring GetWStringFromGenericHdr(const std::wstring& name); void SetWStringToGenericHdr(const std::wstring& name, const std::wstring value, int32_t reserve=-1); int32_t GetInt32FromGenericHdr(const std::wstring& name); void SetInt32ToGenericHdr(const std::wstring& name, int32_t value); void PreparePixelSet(); void PrepareSmallMemoryPixelSet(); void PrepareStatsSet(); void PrepareSet(const std::wstring& name, DataSet*& ds); }; } #endif // _FileHeader_HEADER_ affxparser/src/fusion/calvin_files/data/src/DataException.cpp0000644000175200017520000000457714516003651025437 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/DataException.h" // namespace affymetrix_calvin_exceptions { const std::wstring DataSetNotOpenException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::DataSetNotOpenException thrown."; } const std::wstring ColumnIndexOutOfBoundsException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::ColumnIndexOutOfBoundsException thrown."; } const std::wstring RowIndexOutOfBoundsException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::RowIndexOutOfBoundsException thrown."; } const std::wstring UnexpectedColumnTypeException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::UnexpectedColumnTypeException thrown."; } const std::wstring DataGroupNotFoundException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::DataGroupNotFoundException thrown."; } const std::wstring DataSetNotFoundException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::DataSetNotFoundException thrown."; } const std::wstring ProbeSetNotFoundException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::ProbeSetNotFoundException thrown."; } const std::wstring CDFAccessNotSupportedByModeException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::CDFAccessNotSupportedByModeException thrown."; } const std::wstring DataSetRemapException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::DataSetRemapException thrown."; } } affxparser/src/fusion/calvin_files/data/src/DataException.h0000644000175200017520000001162514516003651025074 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataException_HEADER_ #define _DataException_HEADER_ /*! \file DataException.h This file provides class definition for the data exceptions. */ #include "calvin_files/exception/src/ExceptionBase.h" // namespace affymetrix_calvin_exceptions { class DataSetNotOpenException : public CalvinException { public: DataSetNotOpenException() : CalvinException() {} DataSetNotOpenException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; class ColumnIndexOutOfBoundsException : public CalvinException { public: ColumnIndexOutOfBoundsException() : CalvinException() {} ColumnIndexOutOfBoundsException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; class RowIndexOutOfBoundsException : public CalvinException { public: RowIndexOutOfBoundsException() : CalvinException() {} RowIndexOutOfBoundsException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; class UnexpectedColumnTypeException : public CalvinException { public: UnexpectedColumnTypeException() : CalvinException() {} UnexpectedColumnTypeException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; class DataGroupNotFoundException : public CalvinException { public: DataGroupNotFoundException() : CalvinException() {} DataGroupNotFoundException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; class DataSetNotFoundException : public CalvinException { public: DataSetNotFoundException() : CalvinException() {} DataSetNotFoundException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; class ProbeSetNotFoundException : public CalvinException { public: ProbeSetNotFoundException() : CalvinException() {} ProbeSetNotFoundException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; class CDFAccessNotSupportedByModeException : public CalvinException { public: CDFAccessNotSupportedByModeException() : CalvinException() {} CDFAccessNotSupportedByModeException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; class DataSetRemapException : public CalvinException { public: DataSetRemapException() : CalvinException() {} DataSetRemapException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; } #endif // _DataException_HEADER_ affxparser/src/fusion/calvin_files/data/src/DataGroup.cpp0000644000175200017520000000721614516003651024566 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/DataGroup.h" // #include "calvin_files/data/src/GenericData.h" // using namespace affymetrix_calvin_io; #ifndef _MSC_VER #include #endif /* * Initialize the object to use memory-mapping to access the file. */ DataGroup::DataGroup(const std::string& filename_, const DataGroupHeader& dch, void* handle_, bool loadEntireDataSetHint_) : filename(filename_), dataGroupHeader(dch), useMemoryMapping(true), handle(handle_), fileStream(0), loadEntireDataSetHint(loadEntireDataSetHint_) { } /* * Initialize the object to use std::ifstream to access */ DataGroup::DataGroup(const std::string& filename_, const DataGroupHeader& dch, std::ifstream& ifs, bool loadEntireDataSetHint_) : filename(filename_), dataGroupHeader(dch), useMemoryMapping(false), handle(0), fileStream(&ifs), loadEntireDataSetHint(loadEntireDataSetHint_) { } /*! Returns a pointer to the DataSet object by DataSet index. * Each call will return a new DataSet object. * The caller should call Delete when finished with the DataSet. * * @param dataSetIdx The index of the DataSet to return. * @return DataSet * @exception affymetrix_calvin_exceptions::DataSetNotFoundException DataSet not found. */ affymetrix_calvin_io::DataSet* DataGroup::DataSet(u_int32_t dataSetIdx) { DataSetHeader* dph = GenericData::FindDataSetHeader(&dataGroupHeader, dataSetIdx); if (dph) { if (useMemoryMapping) return new affymetrix_calvin_io::DataSet(filename, *dph, handle, loadEntireDataSetHint); else return new affymetrix_calvin_io::DataSet(filename, *dph, *fileStream, loadEntireDataSetHint); } else { affymetrix_calvin_exceptions::DataSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } /*! Returns a pointer to the DataSet object by DataSet name. * Each call will return a new DataSet object. * The caller should call Delete when finished with the DataSet. * * @param dataSetName The name of the DataSet to return. * @return DataSet * @exception affymetrix_calvin_exceptions::DataSetNotFoundException DataSet not found. */ affymetrix_calvin_io::DataSet* DataGroup::DataSet(const std::wstring& dataSetName) { DataSetHeader* dph = GenericData::FindDataSetHeader(&dataGroupHeader, dataSetName); if (dph) { if (useMemoryMapping) return new affymetrix_calvin_io::DataSet(filename, *dph, handle, loadEntireDataSetHint); else return new affymetrix_calvin_io::DataSet(filename, *dph, *fileStream, loadEntireDataSetHint); } else { affymetrix_calvin_exceptions::DataSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } affxparser/src/fusion/calvin_files/data/src/DataGroup.h0000644000175200017520000001022314516003651024223 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataGroup_HEADER_ #define _DataGroup_HEADER_ /*! \file DataGroup.h This file provides access to the DataSets in a DataGroup. */ #ifdef _MSC_VER #include #endif #include "calvin_files/data/src/DataException.h" #include "calvin_files/data/src/DataGroupHeader.h" #include "calvin_files/data/src/DataSet.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include // namespace affymetrix_calvin_io { // forward declare class GenericData; /*! This class provides methods to get a DataSet in a DataGroup. */ class DataGroup { public: /*! Constructor * @param filename The name of the generic file to access. * @param dch The DataGroupHeader of the DataGroup to access. * @param handle A handle to the file mapping object * @param loadEntireDataSetHint Indicates if DataSets created by DataGroup will attempt to read the entire DataSet data into a memory buffer. */ DataGroup(const std::string& filename, const DataGroupHeader& dch, void* handle, bool loadEntireDataSetHint=false); /*! Constructor * @param filename The name of the generic file to access. * @param dch The DataGroupHeader of the DataGroup to access. * @param ifs An open ifstream object. * @param loadEntireDataSetHint Indicates if DataSets created by DataGroup will attempt to read the entire DataSet data into a memory buffer. */ DataGroup(const std::string& filename, const DataGroupHeader& dch, std::ifstream& ifs, bool loadEntireDataSetHint=false); /*! Method to get a reference to the DataGroupHeader * @return A reference to the DataGroupHeader. */ const affymetrix_calvin_io::DataGroupHeader& Header() { return dataGroupHeader; } /*! Returns a pointer to the DataSet object by DataSet index. * Each call will return a new DataSet object. * The caller should call Delete when finished with the DataSet. * * @param dataSetIdx The index of the DataSet to return. * @return DataSet * @exception affymetrix_calvin_exceptions::DataGroupNotFoundException DataGroup not found. * @exception affymetrix_calvin_exceptions::DataSetNotFoundException DataSet not found. */ affymetrix_calvin_io::DataSet* DataSet(u_int32_t dataSetIdx); /*! Returns a pointer to the DataSet object by DataSet name. * Each call will return a new DataSet object. * The caller should call Delete when finished with the DataSet. * * @param dataSetName The name of the DataSet to return. * @return DataSet * @exception affymetrix_calvin_exceptions::DataGroupNotFoundException DataGroup not found. * @exception affymetrix_calvin_exceptions::DataSetNotFoundException DataSet not found. */ affymetrix_calvin_io::DataSet* DataSet(const std::wstring& dataSetName); protected: /*! Name of the generic file to access */ std::string filename; /*! DataGroupHeader of the DataGroup from which to get DataSets*/ DataGroupHeader dataGroupHeader; /*! A flag the indicates the data access mode. True = access the data using memory-mapping. False = access the data using std::ifstream */ bool useMemoryMapping; /*! File mapping object handle */ void* handle; /*! An open ifstream object */ std::ifstream* fileStream; /*! Indicates whether DataSets created by DataGroup should attempt to read all data into a memory buffer. */ bool loadEntireDataSetHint; }; } #endif //_DataGroup_HEADER_ affxparser/src/fusion/calvin_files/data/src/DataGroupHeader.cpp0000644000175200017520000000504514516003651025675 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/DataGroupHeader.h" // #include "calvin_files/data/src/FileHeader.h" // using namespace affymetrix_calvin_io; DataGroupHeader::DataGroupHeader() { dataSetPos = 0; nextGrpPos = 0; } DataGroupHeader::DataGroupHeader(const std::wstring &n) { name = n; dataSetPos = 0; nextGrpPos = 0; headerStartFilePos = 0; } DataGroupHeader::~DataGroupHeader() { Clear(); } void DataGroupHeader::Clear() { name.clear(); dataSetPos = 0; nextGrpPos = 0; dataSetHdrs.clear(); } void DataGroupHeader::SetName(const std::wstring &p) { name = p; } std::wstring DataGroupHeader::GetName() const { return name; } int32_t DataGroupHeader::GetDataSetCnt() const { return (int32_t)dataSetHdrs.size(); } void DataGroupHeader::AddDataSetHdr(const DataSetHeader &p) { dataSetHdrs.push_back(p); } void DataGroupHeader::ReplaceDataSetHdr(const DataSetHeader &p) { for(int i = 0 ; i < dataSetHdrs.size(); i++) { if(dataSetHdrs[i].GetName() == p.GetName()) { dataSetHdrs[i] = p; return; } } } DataSetHeader& DataGroupHeader::GetDataSet(int32_t index) { return dataSetHdrs[index]; } const DataSetHeader& DataGroupHeader::GetDataSetConst(int32_t index) const { return dataSetHdrs[index]; } void DataGroupHeader::GetDataSetIterators(DataSetHdrIt &begin, DataSetHdrIt &end) { begin = dataSetHdrs.begin(); end = dataSetHdrs.end(); } /* * Find a DataSetHeader given DataSet name */ DataSetHeader* DataGroupHeader::FindDataSetHeader(const std::wstring& dataSetName) { DataSetHeader* dph = 0; DataSetHdrIt begin; DataSetHdrIt end; GetDataSetIterators(begin,end); for (DataSetHdrIt ii=begin; ii!=end; ++ii) { if (ii->GetName() == dataSetName) { dph = &(*ii); break; } } return dph; } affxparser/src/fusion/calvin_files/data/src/DataGroupHeader.h0000644000175200017520000000705214516003651025342 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataGroupHeader_HEADER_ #define _DataGroupHeader_HEADER_ #include "calvin_files/data/src/DataSetHeader.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class DataGroupHeader { public: DataGroupHeader(); DataGroupHeader(const std::wstring &n); ~DataGroupHeader(); private: /*! data dataGroup name */ std::wstring name; /*! file position of the 1st data dataSet */ u_int32_t dataSetPos; /*! file position of the next dataGroup */ u_int32_t nextGrpPos; /*! file position of the start of the data group header */ u_int32_t headerStartFilePos; /*! data dataSets in this dataGroup */ DataSetHdrVector dataSetHdrs; public: /*! */ void Clear(); /*! */ void SetName(const std::wstring &p); /*! */ std::wstring GetName() const; /*! Get the data set count */ int32_t GetDataSetCnt() const; /*! */ void AddDataSetHdr(const DataSetHeader &p); /*!Replace a data set header with a new data set header of the same name. * @param new data set header with the same name as the header it replaces. */ void ReplaceDataSetHdr(const DataSetHeader &p); /*! */ DataSetHeader& GetDataSet(int32_t index); /*! */ const DataSetHeader& GetDataSetConst(int32_t index) const; /*! */ void GetDataSetIterators(DataSetHdrIt &begin, DataSetHdrIt &end); /*! Set the file position of the start of the DataSet header. * The value set here is not written to the file. */ void SetHeaderStartFilePos(u_int32_t pos) { headerStartFilePos = pos; } /*! Get the file position of the start of the DataSet header. */ u_int32_t GetHeaderStartFilePos() const { return headerStartFilePos; } /*! Set the file position of the DataSet header. * The value set here is not necessarily the value written to the file. */ void SetDataSetPos(u_int32_t pos) { dataSetPos = pos; } /*! Get the file position of the DataSet header. */ u_int32_t GetDataSetPos() const { return dataSetPos; } /*! Set the file position of the next DataGroup header. */ void SetNextGroupPos(u_int32_t pos) { nextGrpPos = pos; } /*! Get the file position of the next DataGroup header. */ u_int32_t GetNextGroupPos() const { return nextGrpPos; } /*! */ affymetrix_calvin_io::DataSetHeader* FindDataSetHeader(const std::wstring& dataSetName); }; /*! vector of DataGroupHeaders */ typedef std::vector DataGroupHdrVector; /*! constant iterator of DataGroupHeaders */ typedef std::vector::iterator DataGroupHdrIt; /*! constant iterator of DataGroupHeaders */ typedef std::vector::const_iterator DataGroupHdrConstIt; } #endif // _DataGroupHeader_HEADER_ affxparser/src/fusion/calvin_files/data/src/DataSet.cpp0000644000175200017520000004537714516003651024237 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/DataSet.h" // #include "calvin_files/data/src/GenericData.h" #include "calvin_files/parsers/src/FileInput.h" // #include "util/Fs.h" // #include #include // using namespace affymetrix_calvin_io; #ifndef _MSC_VER #include #include #ifndef PAGE_SIZE /// Page size used for memory mapping in non Windows environment #define PAGE_SIZE (getpagesize()) #endif #ifndef PAGE_MASK /// Page mask used for memory mapping in non Windows environment #define PAGE_MASK ~(PAGE_SIZE-1) #endif #ifndef PAGE_TRUNC /// Page truncation pointer used for memory mapping in non Windows environment #define PAGE_TRUNC(ptr) (ptr&(PAGE_MASK)) #endif #endif /* * Initialize the object to use memory-mapping to access the file. */ DataSet::DataSet(const std::string& fileName_, const DataSetHeader& header_, void* handle, bool loadEntireDataSetHint_) { fileName = fileName_; header = header_; mappedData = 0; data = 0; isOpen = false; #ifdef _MSC_VER fileMapHandle = handle; #else fp = 0; #endif mapStart = 0; mapLen = 0; fileStream = 0; useMemoryMapping = true; loadEntireDataSetHint = loadEntireDataSetHint_; } /* * Initialize the object to use std::ifstream to access the file. */ DataSet::DataSet(const std::string& fileName_, const affymetrix_calvin_io::DataSetHeader& header_, std::ifstream& ifs, bool loadEntireDataSetHint_) { fileName = fileName_; header = header_; mappedData = 0; data = 0; isOpen = false; #ifdef _MSC_VER fileMapHandle = 0; #else fp = 0; #endif mapStart = 0; mapLen = 0; fileStream = &ifs; useMemoryMapping = false; loadEntireDataSetHint = loadEntireDataSetHint_; } /* * Clean up. */ DataSet::~DataSet() { Close(); } /* * Informs the object to delete itself */ void DataSet::Delete() { Close(); delete this; } /* * Open the DataSet for reading */ bool DataSet::Open() { UpdateColumnByteOffsets(); if (useMemoryMapping) isOpen = OpenMM(); else { ReadDataSetUsingStream(); isOpen = true; } return isOpen; } /* * Open the file using memory-mapping */ bool DataSet::OpenMM() { #ifdef _MSC_VER if (MapDataWin32(header.GetDataStartFilePos(), header.GetDataSize()) == false) return false; #else // Open the file fp = fopen(fileName.c_str(), "r"); if (fp == NULL) { return false; } if (MapDataPosix(header.GetDataStartFilePos(), header.GetDataSize()) == false) return false; #endif return true; } /* * Reads the DataSet data from the file into a memory buffer. */ void DataSet::ReadDataSetUsingStream() { if(loadEntireDataSetHint == false) return; mapLen = header.GetDataSize(); mapStart = header.GetDataStartFilePos(); data = new char[mapLen]; fileStream->seekg(mapStart); fileStream->read(data, mapLen); } /* * Close the DataSet */ void DataSet::Close() { if (useMemoryMapping) UnmapFile(); else ClearStreamData(); } #ifdef _MSC_VER std::string GetErrorMsg() { LPVOID lpMsgBuf; if (!FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language (LPTSTR) &lpMsgBuf, 0, NULL )) { // Handle the error. return ""; } std::string message = (char*)lpMsgBuf; // Free the buffer. LocalFree( lpMsgBuf ); return message; } /* * Map the data on Win32 */ bool DataSet::MapDataWin32(u_int32_t start, u_int32_t bytes) { mapStart = start; if (bytes > MaxViewSize) { bytes = MaxViewSize; // limit the amount of data mapped } SYSTEM_INFO sysinfo; GetSystemInfo (&sysinfo); u_int64_t qwFileOffset = u_int64_t(start); DWORD dwOffset = DWORD(qwFileOffset % sysinfo.dwAllocationGranularity); qwFileOffset = (qwFileOffset / sysinfo.dwAllocationGranularity) * sysinfo.dwAllocationGranularity; DWORD dwOffsetHigh = DWORD(qwFileOffset >> 32); DWORD dwOffsetLow = DWORD(qwFileOffset & 0xFFFFFFFF); DWORD dwBytesToMap = bytes + dwOffset; if (mappedData != 0) { UnmapViewOfFile (mappedData); } mappedData = MapViewOfFile(fileMapHandle, FILE_MAP_READ, dwOffsetHigh, dwOffsetLow, dwBytesToMap); if (mappedData == 0) { std::string msg = GetErrorMsg(); data = 0; fileMapHandle = NULL; mapStart = 0; return false; } mapLen = bytes; data = (char *)mappedData + dwOffset; return true; } #else /* * Map the data on Linux */ bool DataSet::MapDataPosix(u_int32_t start, u_int32_t bytes) { mapStart = start; if (fp == NULL) return false; u_int32_t page_start = PAGE_TRUNC(start); u_int32_t page_offset = start - page_start; mapLen = bytes + page_offset; // Get the file size if (Fs::fileExists(fileName)) { int64_t fileLen = Fs::fileSize(fileName); if (fileLen < page_start + mapLen) mapLen = fileLen - page_start; } // Map the file. mappedData = mmap(NULL, mapLen, PROT_READ, MAP_SHARED, fileno(fp), page_start); if (mappedData == MAP_FAILED) { Close(); return false; } else { data = ((char *)mappedData) + page_offset; } return true; } #endif /* * Close the memory-map */ void DataSet::UnmapFile() { #ifdef _MSC_VER // Unmap the view if (mappedData != 0 ) { UnmapViewOfFile(mappedData); mappedData = 0; } fileMapHandle = NULL; data = 0; mapStart = 0; mapLen = 0; #else if (fp != NULL) { if (mappedData) { munmap(mappedData, mapLen); mapLen = 0; mappedData = 0; } fclose(fp); fp = NULL; } #endif } /* * Delete the buffer */ void DataSet::ClearStreamData() { delete[] data; data = 0; mapStart = 0; mapLen = 0; } /* * Check the row, column and expected column type */ void DataSet::CheckRowColumnAndType(int32_t row, int32_t col, DataSetColumnTypes type) { if (isOpen == false) { affymetrix_calvin_exceptions::DataSetNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } if (col < 0 || col >= header.GetColumnCnt()) { affymetrix_calvin_exceptions::ColumnIndexOutOfBoundsException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } if (row < 0 || row >= header.GetRowCnt()) { affymetrix_calvin_exceptions::RowIndexOutOfBoundsException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } // Check if the data type is expected if (header.GetColumnInfo(col).GetColumnType() != type) { affymetrix_calvin_exceptions::UnexpectedColumnTypeException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } /* * Detemine the address of data given row and col. Ensure all requested data is mapped */ char* DataSet::FilePosition(int32_t rowStart, int32_t col, int32_t rowCount) { if (isOpen == false) { affymetrix_calvin_exceptions::DataSetNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } // Limit row count if (rowCount > header.GetRowCnt()) rowCount= header.GetRowCnt(); if (useMemoryMapping == false && loadEntireDataSetHint == false) { return LoadDataAndReturnFilePosition(rowStart, col, rowCount); } // Byte offset in data set + byte offset of data set in file u_int32_t startByte = BytesPerRow()*rowStart + columnByteOffsets[col] + header.GetDataStartFilePos(); #ifdef _MSC_VER if (useMemoryMapping) { // Byte offset in data set + byte offset of data set in file u_int32_t endByte = BytesPerRow()*(rowStart+rowCount-1) + columnByteOffsets[col+1] + header.GetDataStartFilePos(); // as long as col is in bounds this is safe. // Remap the file if necessary if (startByte < mapStart || endByte > mapStart+mapLen) { if (startByte < mapStart) // moving backwards through the data, attempt to find an optimum startByte. { u_int32_t reverseStartByte = 0; if (endByte > MaxViewSize) reverseStartByte = endByte - MaxViewSize; // Don't go above the DataSet data if (reverseStartByte < header.GetDataStartFilePos()) reverseStartByte = header.GetDataStartFilePos(); if (MapDataWin32(reverseStartByte, header.GetDataStartFilePos() + header.GetDataSize() - reverseStartByte) == false) { affymetrix_calvin_exceptions::DataSetRemapException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } else // forward { if (MapDataWin32(startByte, header.GetDataStartFilePos() + header.GetDataSize() - startByte) == false) { affymetrix_calvin_exceptions::DataSetRemapException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } } } #endif char* filePosition = data + (startByte-mapStart); return filePosition; } /* * */ char* DataSet::LoadDataAndReturnFilePosition(int32_t rowStart, int32_t col, int32_t rowCount) { // Delete the previous data ClearStreamData(); mapLen = BytesPerRow()*rowCount; mapStart = BytesPerRow()*rowStart + columnByteOffsets[col] + header.GetDataStartFilePos(); data = new char[mapLen]; fileStream->seekg(mapStart); fileStream->read(data, mapLen); return data; } /* * Update the column sizes */ void DataSet::UpdateColumnByteOffsets() { columnByteOffsets.clear(); int32_t accum = 0; int32_t cols = header.GetColumnCnt(); for (int32_t col = 0; col < cols; ++col) { columnByteOffsets.push_back(accum); accum += header.GetColumnInfo(col).GetSize(); } columnByteOffsets.push_back(accum); } void DataSet::GetData(int32_t row, int32_t col, u_int8_t& value) { // Get the data char* instr = FilePosition(row, col); value = FileInput::ReadUInt8(instr); } void DataSet::GetData(int32_t row, int32_t col, int8_t& value) { // Get the data char* instr = FilePosition(row, col); value = FileInput::ReadInt8(instr); } void DataSet::GetData(int32_t row, int32_t col, u_int16_t& value) { // Get the data char* instr = FilePosition(row, col); value = FileInput::ReadUInt16(instr); } void DataSet::GetData(int32_t row, int32_t col, int16_t& value) { // Get the data char* instr = FilePosition(row, col); value = FileInput::ReadInt16(instr); } void DataSet::GetData(int32_t row, int32_t col, u_int32_t& value) { // Get the data char* instr = FilePosition(row, col); value = FileInput::ReadUInt32(instr); } void DataSet::GetData(int32_t row, int32_t col, int32_t& value) { // Get the data char* instr = FilePosition(row, col); value = FileInput::ReadInt32(instr); } void DataSet::GetData(int32_t row, int32_t col, float& value) { // Get the data char* instr = FilePosition(row, col); value = FileInput::ReadFloat(instr); } void DataSet::GetData(int32_t row, int32_t col, std::string& value) { // Get the data char* instr = FilePosition(row, col); value = FileInput::ReadString8(instr); } void DataSet::GetData(int32_t row, int32_t col, std::wstring& value) { // Get the data char* instr = FilePosition(row, col); value = FileInput::ReadString16(instr); } int32_t DataSet::ComputeEndRow(int32_t startRow, int32_t count) { int32_t rows = startRow + count; if (count == -1 || (rows > header.GetRowCnt())) rows = header.GetRowCnt(); return rows; } template void DataSet::ClearAndSizeVector(std::vector& values, u_int32_t size) { values.clear(); values.resize(size); } template void DataSet::GetDataT(int32_t col, int32_t startRow, int32_t count, T& values) { int32_t endRow = ComputeEndRow(startRow, count); ClearAndSizeVector(values, endRow-startRow); if (header.GetColumnCnt() > 1) { for (int32_t row = startRow; row < endRow; ++row) { // Get the data char* instr = FilePosition(row, col); AssignValue(row-startRow, values, instr); } } else { char* instr = FilePosition(startRow, col, count); int32_t recomputePositionRow = LastRowMapped(); for (int32_t row = startRow; row < endRow; ++row) { if (row > recomputePositionRow) { instr = FilePosition(row, col, count-row); recomputePositionRow = LastRowMapped(); } AssignValue(row-startRow, values, instr); } } } void DataSet::AssignValue(int32_t index, Uint8Vector& values, char*& instr) { values[index] = FileInput::ReadUInt8(instr); } void DataSet::AssignValue(int32_t index, Int8Vector& values, char*& instr) { values[index] = FileInput::ReadInt8(instr); } void DataSet::AssignValue(int32_t index, Uint16Vector& values, char*& instr) { values[index] = FileInput::ReadUInt16(instr); } void DataSet::AssignValue(int32_t index, Int16Vector& values, char*& instr) { values[index] = FileInput::ReadInt16(instr); } void DataSet::AssignValue(int32_t index, Uint32Vector& values, char*& instr) { values[index] = FileInput::ReadUInt32(instr); } void DataSet::AssignValue(int32_t index, Int32Vector& values, char*& instr) { values[index] = FileInput::ReadInt32(instr); } void DataSet::AssignValue(int32_t index, FloatVector& values, char*& instr) { values[index] = FileInput::ReadFloat(instr); } void DataSet::AssignValue(int32_t index, StringVector& values, char*& instr) { values[index] = FileInput::ReadString8(instr); } void DataSet::AssignValue(int32_t index, WStringVector& values, char*& instr) { values[index] = FileInput::ReadString16(instr); } void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, Uint8Vector& values) { GetDataT(col, startRow, count, values); } void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, Int8Vector& values) { GetDataT(col, startRow, count, values); } void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, Uint16Vector& values) { GetDataT(col, startRow, count, values); } void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, Int16Vector& values) { GetDataT(col, startRow, count, values); } void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, Uint32Vector& values) { GetDataT(col, startRow, count, values); } void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, Int32Vector& values) { GetDataT(col, startRow, count, values); } void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, FloatVector& values) { GetDataT(col, startRow, count, values); } void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, std::vector& values) { GetDataT(col, startRow, count, values); } void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, WStringVector& values) { GetDataT(col, startRow, count, values); } template int32_t DataSet::GetDataRawT(int32_t col, int32_t startRow, int32_t count, T* values) { int32_t endRow = ComputeEndRow(startRow, count); if (header.GetColumnCnt() > 1) { for (int32_t row = startRow; row < endRow; ++row) { char* instr = FilePosition(row, col); AssignValue(row-startRow, values, instr); } } else // optimize { char* instr = FilePosition(startRow, col, count); int32_t recomputePositionRow = LastRowMapped(); for (int32_t row = startRow; row < endRow; ++row) { if (row > recomputePositionRow) { instr = FilePosition(row, col, count-row); recomputePositionRow = LastRowMapped(); } AssignValue(row-startRow, values, instr); } } return endRow-startRow; } void DataSet::AssignValue(int32_t index, u_int8_t* values, char*& instr) { values[index] = FileInput::ReadUInt8(instr); } void DataSet::AssignValue(int32_t index, int8_t* values, char*& instr) { values[index] = FileInput::ReadInt8(instr); } void DataSet::AssignValue(int32_t index, u_int16_t* values, char*& instr) { values[index] = FileInput::ReadUInt16(instr); } void DataSet::AssignValue(int32_t index, int16_t* values, char*& instr) { values[index] = FileInput::ReadInt16(instr); } void DataSet::AssignValue(int32_t index, u_int32_t* values, char*& instr) { values[index] = FileInput::ReadUInt32(instr); } void DataSet::AssignValue(int32_t index, int32_t* values, char*& instr) { values[index] = FileInput::ReadInt32(instr); } void DataSet::AssignValue(int32_t index, float* values, char*& instr) { values[index] = FileInput::ReadFloat(instr); } void DataSet::AssignValue(int32_t index, std::string* values, char*& instr) { values[index] = FileInput::ReadString8(instr); } void DataSet::AssignValue(int32_t index, std::wstring* values, char*& instr) { values[index] = FileInput::ReadString16(instr); } int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int8_t* values) { return GetDataRawT(col, startRow, count, values); } int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, int8_t* values) { return GetDataRawT(col, startRow, count, values); } int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int16_t* values) { return GetDataRawT(col, startRow, count, values); } int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, int16_t* values) { return GetDataRawT(col, startRow, count, values); } int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int32_t* values) { return GetDataRawT(col, startRow, count, values); } int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, int32_t* values) { return GetDataRawT(col, startRow, count, values); } int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, float* values) { return GetDataRawT(col, startRow, count, values); } int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, std::string* values) { return GetDataRawT(col, startRow, count, values); } int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, std::wstring* values) { return GetDataRawT(col, startRow, count, values); } int32_t DataSet::LastRowMapped() { return (mapLen+(mapStart-header.GetDataStartFilePos()))/BytesPerRow() - 1; } affxparser/src/fusion/calvin_files/data/src/DataSet.h0000644000175200017520000003420314516003651023666 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataSet_HEADER_ #define _DataSet_HEADER_ #ifdef _MSC_VER #include #endif #include "calvin_files/data/src/DataException.h" #include "calvin_files/data/src/DataSetHeader.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffyStlCollectionTypes.h" // #include #include #include // namespace affymetrix_calvin_io { // forward declare class GenericData; /*! This class provides methods to access the data of a DataSet. */ class DataSet { public: /*! Constructor. Use this constructor do access the data using memory-mapping. * On Windows, memory-mapping will be restricted to 200MB view of the DataSet data. * @param fileName The name of the generic file to access. * @param header The DataSetHeader of the DataSet to access. * @param handle A handle to the file mapping object * @param loadEntireDataSetHint Indicate if DataSet will attempt to read the entire DataSet data into a memory buffer. */ DataSet(const std::string& fileName, const affymetrix_calvin_io::DataSetHeader& header, void* handle, bool loadEntireDataSetHint=false); /*! Constructor. Use this constructor do access the data using std::ifstream. * With fstream access the entire DataSet data will be read into memory. * @param fileName The name of the generic file to access. * @param header The DataSetHeader of the DataSet to access. * @param ifs A reference to an open ifstream. * @param loadEntireDataSetHint Indicate if DataSet will attempt to read the entire DataSet data into a memory buffer. */ DataSet(const std::string& fileName, const affymetrix_calvin_io::DataSetHeader& header, std::ifstream& ifs, bool loadEntireDataSetHint=false); public: /*! Method to release memory held by this object. Closes object before deleting. */ void Delete(); /*! Method to open the DataSet to access the data. * @return true if successful */ bool Open(); /*! Method to close the DataSet. */ void Close(); /*! Method to get a reference to the DataSetHeader * @return A reference to the DataSetHeader. */ const affymetrix_calvin_io::DataSetHeader& Header() { return header; } /*! Return the number of rows in the DataSet. */ int32_t Rows() { return header.GetRowCnt(); } /*! Return the number of columns in the DataSet. */ int32_t Cols() { return header.GetColumnCnt(); } /*! Determines if the DataSet is open * @return true if the DataSet is open */ bool IsOpen() { return (isOpen); } /*! Provides access to single data elements * @param row Row index. * @param col Column index. * @param value Reference to the data type to fill with the data. * @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped. */ void GetData(int32_t row, int32_t col, u_int8_t& value); void GetData(int32_t row, int32_t col, int8_t& value); void GetData(int32_t row, int32_t col, u_int16_t& value); void GetData(int32_t row, int32_t col, int16_t& value); void GetData(int32_t row, int32_t col, u_int32_t& value); void GetData(int32_t row, int32_t col, int32_t& value); void GetData(int32_t row, int32_t col, float& value); void GetData(int32_t row, int32_t col, std::string& value); void GetData(int32_t row, int32_t col, std::wstring& value); /*! Provides access to multiple data elements in the same column. * If count elements could not be read, it is not considered an error. The vector * is filled with only the data that could be read. * @param col Column index. * @param startRow Row index of the data to be inserted into the vector at [0]. * @param count Number of elements to retrieve. -1 indicates to read all * @param values Reference to the data type to fill with the data. * @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped. */ void GetData(int32_t col, int32_t startRow, int32_t count, Uint8Vector& values); void GetData(int32_t col, int32_t startRow, int32_t count, Int8Vector& values); void GetData(int32_t col, int32_t startRow, int32_t count, Uint16Vector& values); void GetData(int32_t col, int32_t startRow, int32_t count, Int16Vector& values); void GetData(int32_t col, int32_t startRow, int32_t count, Uint32Vector& values); void GetData(int32_t col, int32_t startRow, int32_t count, Int32Vector& values); void GetData(int32_t col, int32_t startRow, int32_t count, FloatVector& values); void GetData(int32_t col, int32_t startRow, int32_t count, std::vector& values); void GetData(int32_t col, int32_t startRow, int32_t count, WStringVector& values); /*! Provides access to multiple data elements in the same column. * The caller is responsible for allocating the storage to which count element values can be written. * If count elements could not be read, it is not considered an error. The array * is filled with only the data that could be read. * @param col Column index. * @param startRow Row index of the data to be inserted into the vector at [0]. * @param count Number of elements to retrieve. -1 indicates to read all * @param values Reference to the data type to fill with the data. * @return Number of elements read. * @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped. */ int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int8_t* values); int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, int8_t* values); int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int16_t* values); int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, int16_t* values); int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int32_t* values); int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, int32_t* values); int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, float* values); int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, std::string* values); int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, std::wstring* values); /*! Check that the requested data matches the type of data in the column and that row and column are in bounds. * @param row Row index to check. * @param col Column index to check. * @param type Column type to check. * @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped. * @exception affymetrix_calvin_exceptions::ColumnIndexOutOfBoundsException The column index is out-of-bounds. * @exception affymetrix_calvin_exceptions::RowIndexOutOfBoundsException The row index is out-of-bounds. * @exception affymetrix_calvin_exceptions::UnexpectedColumnTypeException The column type does not match the type requested. */ void CheckRowColumnAndType(int32_t row, int32_t col, affymetrix_calvin_io::DataSetColumnTypes type); //protected: /*! Return the bytes per row. * @return Bytes in a row. */ int32_t BytesPerRow() { return columnByteOffsets[header.GetColumnCnt()]; } //protected: /*! Destructor. */ ~DataSet(); protected: /*! Open the DataSet using memory-mapping * @return True if the DataSet was successully mapped. */ bool OpenMM(); /*! Read the DataSet data into a buffer using ifstream::read. */ void ReadDataSetUsingStream(); /*! Close the memory mapped file. */ void UnmapFile(); /*! Delete the DataSet data read in by ifstream::read */ void ClearStreamData(); /*! Returns the address of a data element given a row and column. Ensures that data from rowStart * to rowCount+rowStart are mapped unless that is larger than the mapped window. * @param rowStart Row index * @param col Column index * @param rowCount The number of rows to ensure are mapped starting at rowStart * @return Pointer to the data element at rowStart * @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not open. * @exception affymetrix_calvin_exceptions::DataSetRemapException There was an error during a remap. */ char* FilePosition(int32_t rowStart, int32_t col, int32_t rowCount=1); /*! Returns the address of a data element given a row and column. Ensures that data from rowStart * to rowCount+rowStart are copied from the file into a memory buffer. The memory buffer will * remain intact until the next call to LoadDataAndReturnFilePosition. * @param rowStart Row index * @param col Column index * @param rowCount The number of rows to ensure are mapped starting at rowStart * @return Pointer to the data element at rowStart * @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not open. */ char* LoadDataAndReturnFilePosition(int32_t rowStart, int32_t col, int32_t rowCount); /*! Updates the columnByteOffsets member. */ void UpdateColumnByteOffsets(); /*! Computes the index of the row after last to read. * @param startRow Row index of the data to be inserted into the vector at [0]. * @param count Number of elements to be retrieved. -1 indicates read to the last element. * @return Index of row after the last row to read. */ int32_t ComputeEndRow(int32_t startRow, int32_t count); /*! Clears and resizes the vector * @param values Reference to a vector to clear and resize. * @param size Target size of the vector */ template void ClearAndSizeVector(std::vector& values, u_int32_t size); /*! Template method to get data into a vector * @param col Column index. * @param startRow Row index of the data to be inserted into the vector at [0]. * @param count Number of elements to retrieve. -1 indicates to read all * @param values Reference to the data type to fill with the data. * @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped. */ template void GetDataT(int32_t col, int32_t startRow, int32_t count, T& values); /*! Template method to get data into an array * @param col Column index. * @param startRow Row index of the data to be inserted into the vector at [0]. * @param count Number of elements to retrieve. -1 indicates to read all * @param values Reference to the data type to fill with the data. * @return Number of elements read. * @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped. */ template int32_t GetDataRawT(int32_t col, int32_t startRow, int32_t count, T* values); /*! Returns the index of the last row mapped. * @return Index of the last row mapped. */ int32_t LastRowMapped(); /*! Platform specific memory-mapping method */ #ifdef _MSC_VER bool MapDataWin32(u_int32_t start, u_int32_t bytes); #else bool MapDataPosix(u_int32_t start, u_int32_t bytes); #endif /*! Reads from the instr pointer into the vector at the index indicated. * @param index Index to the vector where to write the value. * @param values The vector into which to write the value. * @param instr A pointer to the data in the memory buffer. The pointer is advanced by the method. */ void AssignValue(int32_t index, Uint8Vector& values, char*& instr); void AssignValue(int32_t index, Int8Vector& values, char*& instr); void AssignValue(int32_t index, Uint16Vector& values, char*& instr); void AssignValue(int32_t index, Int16Vector& values, char*& instr); void AssignValue(int32_t index, Uint32Vector& values, char*& instr); void AssignValue(int32_t index, Int32Vector& values, char*& instr); void AssignValue(int32_t index, FloatVector& values, char*& instr); void AssignValue(int32_t index, StringVector& values, char*& instr); void AssignValue(int32_t index, WStringVector& values, char*& instr); void AssignValue(int32_t index, u_int8_t* values, char*& instr); void AssignValue(int32_t index, int8_t* values, char*& instr); void AssignValue(int32_t index, u_int16_t* values, char*& instr); void AssignValue(int32_t index, int16_t* values, char*& instr); void AssignValue(int32_t index, u_int32_t* values, char*& instr); void AssignValue(int32_t index, int32_t* values, char*& instr); void AssignValue(int32_t index, float* values, char*& instr); void AssignValue(int32_t index, std::string* values, char*& instr); void AssignValue(int32_t index, std::wstring* values, char*& instr); protected: /*! name of the file containing the data data set*. */ std::string fileName; /*! copy of the DataSetHeader */ affymetrix_calvin_io::DataSetHeader header; /*! pointer to the mapped data, doesn't account for allocation granularity. */ void* mappedData; /*! pointer to the data. In memory-mapping mode, the pointer has been adjusted for the allocation granularity. */ char* data; /*! Array of column byte offsets. Updated when the file is opened. * There are columns + 1 elements */ Int32Vector columnByteOffsets; #ifdef _MSC_VER /*! Handle returned by CreateFileMapping */ HANDLE fileMapHandle; /*! Maximum size of the view to map */ static const u_int32_t MaxViewSize = 200*1024*1024; // 200MB #else FILE *fp; #endif /*! Indicates if the DataSet is open*/ bool isOpen; /*! Byte offset to the start of the view */ u_int32_t mapStart; /*! Number of bytes mapped to the view */ u_int32_t mapLen; /*! A flag the indicates the data access mode. True = access the data using memory-mapping. False = access the data using std::ifstream */ bool useMemoryMapping; /*! An open ifstream object */ std::ifstream* fileStream; /*! Indicates whether to attempt to read all data into a memory buffer. */ bool loadEntireDataSetHint; }; } #endif // _DataSet_HEADER_ affxparser/src/fusion/calvin_files/data/src/DataSetHeader.cpp0000644000175200017520000001045014516003651025330 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/DataSetHeader.h" // #include "calvin_files/data/src/FileHeader.h" // #include // using namespace affymetrix_calvin_io; DataSetHeader::DataSetHeader() { Clear(); } DataSetHeader::~DataSetHeader() { Clear(); } void DataSetHeader::Clear() { rowCount = 0; name.clear(); ClearNameValueParameters(); columnTypes.clear(); headerStartFilePos = 0; dataStartFilePos = 0; nextSetFilePos = 0; } void DataSetHeader::ClearNameValueParameters() { nameValParams.clear(); } int32_t DataSetHeader::GetDataSize() const { return GetRowSize() * rowCount; } int32_t DataSetHeader::GetRowSize() const { u_int32_t result = 0; int32_t sz = GetColumnCnt(); for(int i = 0; i < sz; i++) { result += GetColumnInfo(i).GetSize(); } return result; } void DataSetHeader::SetName(const std::wstring &p) { name = p; } std::wstring DataSetHeader::GetName() const { return name; } int32_t DataSetHeader::GetNameValParamCnt() const { return (int32_t)nameValParams.size(); } void DataSetHeader::AddNameValParam(const ParameterNameValueType &p) { nameValParams.push_back(p); } void DataSetHeader::GetNameValIterators(ParameterNameValueTypeConstIt &begin, ParameterNameValueTypeConstIt &end) const { begin = nameValParams.begin(); end = nameValParams.end(); } void DataSetHeader::GetNameValIterators(ParameterNameValueTypeIt &begin, ParameterNameValueTypeIt &end) { begin = nameValParams.begin(); end = nameValParams.end(); } void DataSetHeader::AddColumn(const ColumnInfo& colInfo) { columnTypes.push_back(colInfo); } void DataSetHeader::AddIntColumn(const std::wstring& name) { columnTypes.push_back(IntColumn(name)); } void DataSetHeader::AddUIntColumn(const std::wstring& name) { columnTypes.push_back(UIntColumn(name)); } void DataSetHeader::AddShortColumn(const std::wstring& name) { columnTypes.push_back(ShortColumn(name)); } void DataSetHeader::AddUShortColumn(const std::wstring& name) { columnTypes.push_back(UShortColumn(name)); } void DataSetHeader::AddByteColumn(const std::wstring& name) { columnTypes.push_back(ByteColumn(name)); } void DataSetHeader::AddUByteColumn(const std::wstring& name) { columnTypes.push_back(UByteColumn(name)); } void DataSetHeader::AddFloatColumn(const std::wstring& name) { columnTypes.push_back(FloatColumn(name)); } void DataSetHeader::AddAsciiColumn(const std::wstring& name, int32_t len) { columnTypes.push_back(ASCIIColumn(name, len)); } void DataSetHeader::AddUnicodeColumn(const std::wstring& name, int32_t len) { columnTypes.push_back(UnicodeColumn(name, len)); } ColumnInfo DataSetHeader::GetColumnInfo(int32_t index) const { return columnTypes[index]; } int32_t DataSetHeader::GetRowCnt() const { return rowCount; } void DataSetHeader::SetRowCnt(int32_t p) { rowCount = p; } int32_t DataSetHeader::GetColumnCnt() const { return (int32_t)columnTypes.size(); } bool DataSetHeader::FindNameValParam(const std::wstring& name, ParameterNameValueType& result) const { ParameterNameValueType t; t.SetName(name); t.SetValueText(L""); ParameterNameValueTypeConstIt found = FindNameValParam(t); if (found != nameValParams.end()) { result = *found; return true; } return false; } ParameterNameValueTypeConstIt DataSetHeader::FindNameValParam(const ParameterNameValueType& p) const { ParameterNameValueTypeConstIt begin, end; begin = nameValParams.begin(); end = nameValParams.end(); ParameterNameValueTypeConstIt ii = std::find(begin, end, p); if (ii != end) { return ii; } else return end; } affxparser/src/fusion/calvin_files/data/src/DataSetHeader.h0000644000175200017520000001243114516003651024776 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataSetHeader_HEADER_ #define _DataSetHeader_HEADER_ /*! \file DataSetHeader.h This file defines the data container for DataSetHeader information. */ #include "calvin_files/data/src/ColumnInfo.h" #include "calvin_files/data/src/GenericDataHeader.h" #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/utils/src/AffyStlCollectionTypes.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif using namespace affymetrix_calvin_parameter; namespace affymetrix_calvin_io { /*! The DataSetHeader information container class. */ class DataSetHeader { public: DataSetHeader(); ~DataSetHeader(); private: /*! total rows in the data set */ int32_t rowCount; /*! data data set name */ std::wstring name; /*! name/value pairs */ ParameterNameValueTypeVector nameValParams; /*! column information */ ColInfoVector columnTypes; /*! file position of the start of the dataSet header */ u_int32_t headerStartFilePos; /*! file position of the start of the data */ u_int32_t dataStartFilePos; /*! file position of the next dataSet header */ u_int32_t nextSetFilePos; public: /*! */ void Clear(); void ClearNameValueParameters(); /*! */ int32_t GetDataSize() const; /*! */ int32_t GetRowSize() const; /*! */ void SetName(const std::wstring &p); /*! */ std::wstring GetName() const; /*! */ int32_t GetNameValParamCnt() const; /*! */ void AddNameValParam(const ParameterNameValueType &p); /*! */ void GetNameValIterators(ParameterNameValueTypeConstIt &begin, ParameterNameValueTypeConstIt &end) const; void GetNameValIterators(ParameterNameValueTypeIt &begin, ParameterNameValueTypeIt &end); /*! Finds a ParameterNameValueType by name in the nameValPairs collection * @param name The name of the NameValPair to find * @param result Reference to a ParameterNameValueType to fill with the found ParameterNameValueType. * @return true if the ParameterNameValueType was found */ bool FindNameValParam(const std::wstring& name, ParameterNameValueType& result) const; /*! */ void AddColumn(const ColumnInfo& colInfo); /*! */ void AddIntColumn(const std::wstring& name); /*! */ void AddUIntColumn(const std::wstring& name); /*! */ void AddShortColumn(const std::wstring& name); /*! */ void AddUShortColumn(const std::wstring& name); /*! */ void AddByteColumn(const std::wstring& name); /*! */ void AddUByteColumn(const std::wstring& name); /*! */ void AddFloatColumn(const std::wstring& name); /*! * @param len Maximum number of char in string */ void AddAsciiColumn(const std::wstring& name, int32_t len); /*! * @param name The name of the column. * @param len Maximum number of wchar_t in string */ void AddUnicodeColumn(const std::wstring& name, int32_t len); /*! */ ColumnInfo GetColumnInfo(int32_t index) const; /*! */ int32_t GetRowCnt() const; /*! */ void SetRowCnt(int32_t p); /*! */ int32_t GetColumnCnt() const; /*! Set the file position of the start of the DataSet header. * The value set here is not written to the file. */ void SetHeaderStartFilePos(u_int32_t pos) { headerStartFilePos = pos; } /*! Get the file position of the start of the DataSet header. */ u_int32_t GetHeaderStartFilePos() const { return headerStartFilePos; } /*! Set the file position of the start of the DataSet data. * The value set here is not written to the file. */ void SetDataStartFilePos(u_int32_t pos) { dataStartFilePos = pos; } /*! Get the file position of the start of the DataSet data. */ u_int32_t GetDataStartFilePos() const { return dataStartFilePos; } /*! Set the file position of the next DataSet header. */ void SetNextSetFilePos(u_int32_t pos) { nextSetFilePos = pos; } /*! Get the file position of the next DataSet header. */ u_int32_t GetNextSetFilePos() const { return nextSetFilePos; } protected: /*! Finds a ParameterNameValueType by name in the nameValPairs collection * @param p The ParameterNameValueType to find * @return An iterator referencing the NameValPair if it exists, otherwise it returns nameValPairs.end() */ ParameterNameValueTypeConstIt FindNameValParam(const ParameterNameValueType& p) const; private: }; /*! vector of DataSetHeaders */ typedef std::vector DataSetHdrVector; /*! constant iterator of DataSetHeaders */ typedef std::vector::iterator DataSetHdrIt; } #endif // _DataSetHeader_HEADER_ affxparser/src/fusion/calvin_files/data/src/FamilialMultiDataData.h0000644000175200017520000000540014516003651026453 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FamilialMultiDataData_HEADER_ #define _FamilialMultiDataData_HEADER_ /*! \file FamilialMultiDataData.h This file provides types to store results for a familial file. */ // #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include #include // namespace affymetrix_calvin_data { /*! Stores the segment overlap from a familial file. */ typedef struct _FamilialSegmentOverlap { /*! The type of segment; the name of the data set in which the segment appears in the CYCHP file. */ std::string segmentType; /*! The key identifying the sample from the Samples data set. */ u_int32_t referenceSampleKey; /*! The ID of the segment of the reference sample. */ std::string referenceSegmentID; /*! The key identifying the sample from the Samples data set. */ u_int32_t familialSampleKey; /*! The ID of the segment of the compare sample. */ std::string familialSegmentID; } FamilialSegmentOverlap; /*! Stores information about the sample for a familial file. */ typedef struct _FamilialSample { /*! Local arbitrary unique sample identifier used within the file. */ u_int32_t sampleKey; /*! The identifier of the ARR file associated with the sample. If no ARR file was used in generating the associated CYCHP files, this value will be the empty string. */ std::string arrID; /*! The identifier of the CYCHP file containing the sample data. */ std::string chpID; /*! The filename (not the complete path) of the CYCHP file containing the sample data. */ std::wstring chpFilename; /*! The role of the identified sample, such as “index”, “mother”, or “father”. */ std::string role; /*! The call of whether the assigned role is correct. */ bool roleValidity; /*! The confidence that the assigned role is correct */ float roleConfidence; } FamilialSample; } #endif affxparser/src/fusion/calvin_files/data/src/FileHeader.cpp0000644000175200017520000000462014516003651024664 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/FileHeader.h" // using namespace affymetrix_calvin_io; FileHeader::FileHeader() { magic = MAGIC_NUM; version = CALVINIOVERSION; } void FileHeader::Clear() { dataGroupHdrs.clear(); genericHdr.Clear(); numDataGroups = 0; firstDataGroupFilePos = 0; } void FileHeader::SetFilename(const std::string &p) { filename = p; } std::string FileHeader::GetFilename() const { return filename; } u_int8_t FileHeader::GetMagicNumber() const { return magic; } u_int8_t FileHeader::GetVersion() const { return version; } void FileHeader::AddDataGroupHdr(const DataGroupHeader &p) { dataGroupHdrs.push_back(p); } DataGroupHeader& FileHeader::GetDataGroup(int32_t index) { return dataGroupHdrs[index]; } const DataGroupHeader& FileHeader::GetDataGroupConst(int32_t index) const { return dataGroupHdrs[index]; } void FileHeader::GetDataGroupIts(DataGroupHdrIt &begin, DataGroupHdrIt &end) { begin = dataGroupHdrs.begin(); end = dataGroupHdrs.end(); } void FileHeader::SetGenericDataHdr(const GenericDataHeader &p) { genericHdr = p; } GenericDataHeader* FileHeader::GetGenericDataHdr() { return &genericHdr; } int32_t FileHeader::GetDataGroupCnt() const { return (int32_t)dataGroupHdrs.size(); } /* * Finds a DataGroupHeader by name */ DataGroupHeader* FileHeader::FindDataGroupHeader(const std::wstring& name) { DataGroupHeader* dch = 0; DataGroupHdrIt begin; DataGroupHdrIt end; GetDataGroupIts(begin,end); for (DataGroupHdrIt ii=begin; ii!=end; ++ii) { if (ii->GetName() == name) { dch = &(*ii); break; } } return dch; } affxparser/src/fusion/calvin_files/data/src/FileHeader.h0000644000175200017520000000662314516003651024336 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FileHeader_HEADER_ #define _FileHeader_HEADER_ /*! \file FileHeader.h This file defines a class to act as a data container for the generic file header */ #include "calvin_files/data/src/DataGroupHeader.h" #include "calvin_files/data/src/GenericDataHeader.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffyStlCollectionTypes.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { static const u_int8_t MAGIC_NUM = 59; static const u_int8_t CALVINIOVERSION = 1; /*! This class defines a data container for the generic file header */ class FileHeader { public: FileHeader(); ~FileHeader() { Clear(); } private: /*! filename */ std::string filename; u_int8_t magic; u_int8_t version; DataGroupHdrVector dataGroupHdrs; GenericDataHeader genericHdr; /*! Number of data dataGroups in the file */ int32_t numDataGroups; /*! Position of the first DataGroup. */ u_int32_t firstDataGroupFilePos; public: void Clear(); /*! */ void SetFilename(const std::string &p); /*! */ std::string GetFilename() const; u_int8_t GetMagicNumber() const; u_int8_t GetVersion() const; /*! Get the number of DataGroupHeaders added */ int32_t GetDataGroupCnt() const; void AddDataGroupHdr(const DataGroupHeader &p); /*! Get a DataGroupHeader by index. Max index < GetDataGroupCnt */ DataGroupHeader& GetDataGroup(int32_t index); const DataGroupHeader& GetDataGroupConst(int32_t index) const; void GetDataGroupIts(DataGroupHdrIt &begin, DataGroupHdrIt &end); void SetGenericDataHdr(const GenericDataHeader &p); GenericDataHeader* GetGenericDataHdr(); /*! Finds a DataGroupHeader by name. * @param name The name of the DataGroup * @return A pointer to the DataGroupHeader. If not found, the return is 0. */ DataGroupHeader* FindDataGroupHeader(const std::wstring& name); /*! Get the number of DataGroups in a file.*/ int32_t GetNumDataGroups() const { return numDataGroups; } /*! Set the number of DataGroups. Set when reading a file */ void SetNumDataGroups(int32_t value) { numDataGroups = value; } /*! Get the file position to the first DataGroup */ u_int32_t GetFirstDataGroupFilePos() { return firstDataGroupFilePos; } /*! Set the file postion to the first DataGroup. Method should be protected. It is set when the object is being read. */ void SetFirstDataGroupFilePos(u_int32_t value) { firstDataGroupFilePos = value; } }; } #endif // _FileHeader_HEADER_ affxparser/src/fusion/calvin_files/data/src/GenericData.cpp0000644000175200017520000003217014516003651025043 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/GenericData.h" // #include "calvin_files/array/src/ArrayId.h" #include "calvin_files/parsers/src/DataGroupHeaderReader.h" #include "calvin_files/parsers/src/DataSetHeaderReader.h" // #include "util/Fs.h" // #include #include #include #include // using namespace affymetrix_calvin_io; /* * Initialize the class. */ GenericData::GenericData() { #ifdef _MSC_VER fileMapHandle = NULL; fileHandle = INVALID_HANDLE_VALUE; #endif useMemoryMapping = true; loadEntireDataSetHint = false; } /* * Clean up. */ GenericData::~GenericData() { Clear(); } /* * Get the file identifier */ affymetrix_calvin_utilities::AffymetrixGuidType GenericData::FileIdentifier() { return header.GetGenericDataHdr()->GetFileId(); } /* * Get the parent array file identifier */ affymetrix_calvin_utilities::AffymetrixGuidType GenericData::ArrayFileIdentifier() { // Find the parent array file generic header affymetrix_calvin_utilities::AffymetrixGuidType arrayFileID; //GenDataHdrVectorIt begin; //GenDataHdrVectorIt end; GenericDataHeader* hdr = header.GetGenericDataHdr()->FindParent(ARRAY_TYPE_IDENTIFIER); if (hdr) { arrayFileID = hdr->GetFileId(); } return arrayFileID; } /* * Returns the parent array identifier. */ affymetrix_calvin_utilities::AffymetrixGuidType GenericData::ArrayIdentifier() { // Find the parent array file generic header affymetrix_calvin_utilities::AffymetrixGuidType arrayID; //GenDataHdrVectorIt begin; //GenDataHdrVectorIt end; GenericDataHeader* hdr = header.GetGenericDataHdr()->FindParent(ARRAY_TYPE_IDENTIFIER); if (hdr) { ParameterNameValueType nvt; if (hdr->FindNameValParam(ARRAY_ID_PARAM_NAME, nvt)) { arrayID = nvt.GetValueAscii(); } } return arrayID; } /* * Get the number of DataGroups */ u_int32_t GenericData::DataGroupCnt() const { return header.GetDataGroupCnt(); } /* * Get the names of the DataGroup */ void GenericData::DataGroupNames(std::vector& names) { names.clear(); DataGroupHdrIt begin; DataGroupHdrIt end; header.GetDataGroupIts(begin,end); for (DataGroupHdrIt ii=begin; ii!=end; ++ii) { names.push_back(ii->GetName()); } } /* * Get the number of DataSets given a DataGroup index */ u_int32_t GenericData::DataSetCnt(u_int32_t dataGroupIdx) { DataGroupHeader* dch = FindDataGroupHeader(dataGroupIdx); if (dch) return dch->GetDataSetCnt(); else { affymetrix_calvin_exceptions::DataGroupNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } /* * Get the number of DataSets given a DataGroup name */ u_int32_t GenericData::DataSetCnt(const std::wstring& dataGroupName) { DataGroupHeader* dch = FindDataGroupHeader(dataGroupName); if (dch) return dch->GetDataSetCnt(); else { affymetrix_calvin_exceptions::DataGroupNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } /* * Get the DataSet names given a DataGroup index */ void GenericData::DataSetNames(u_int32_t dataGroupIdx, std::vector& names) { DataGroupHeader* dch = FindDataGroupHeader(dataGroupIdx); if (dch == 0) { affymetrix_calvin_exceptions::DataGroupNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } names.clear(); DataSetHdrIt begin; DataSetHdrIt end; dch->GetDataSetIterators(begin,end); for (DataSetHdrIt ii=begin; ii!=end; ++ii) { names.push_back(ii->GetName()); } } /* * Get the DataSet names given a DataGroup name */ void GenericData::DataSetNames(const std::wstring& dataGroupName, std::vector& names) { DataGroupHeader* dch = FindDataGroupHeader(dataGroupName); if (dch == 0) { affymetrix_calvin_exceptions::DataGroupNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } names.clear(); DataSetHdrIt begin; DataSetHdrIt end; dch->GetDataSetIterators(begin,end); for (DataSetHdrIt ii=begin; ii!=end; ++ii) { names.push_back(ii->GetName()); } } /* * Get the DataSet given a DataGroup and DataSet index */ DataSet* GenericData::DataSet(u_int32_t dataGroupIdx, u_int32_t dataSetIdx) { if (Open() == false) { affymetrix_calvin_exceptions::FileNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } DataGroupHeader* dch = FindDataGroupHeader(dataGroupIdx); if (dch) { DataSetHeader* dph = FindDataSetHeader(dch, dataSetIdx); if (dph) { return CreateDataSet(dph); } else { affymetrix_calvin_exceptions::DataSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } else { affymetrix_calvin_exceptions::DataGroupNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } /* * Get the DataSet given a DataGroup and DataSet name */ DataSet* GenericData::DataSet(const std::wstring& dataGroupName, const std::wstring& dataSetName) { if (Open() == false) { affymetrix_calvin_exceptions::FileNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } DataGroupHeader* dch = FindDataGroupHeader(dataGroupName); if (dch) { DataSetHeader* dph = FindDataSetHeader(dch, dataSetName); if (dph) { return CreateDataSet(dph); } else { affymetrix_calvin_exceptions::DataSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } else { affymetrix_calvin_exceptions::DataGroupNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } /* * Creates a new DataSet based on the dsh argument */ DataSet* GenericData::CreateDataSet(DataSetHeader* dsh) { void* handle = 0; #ifdef _MSC_VER handle = fileMapHandle; #endif ReadFullDataSetHeader(dsh); if (useMemoryMapping) { return new affymetrix_calvin_io::DataSet(Header().GetFilename(), *dsh, handle, loadEntireDataSetHint); } else { return new affymetrix_calvin_io::DataSet(Header().GetFilename(), *dsh, fileStream, loadEntireDataSetHint); } } /* * Returns a DataGroup object based on a DataGroup file position */ affymetrix_calvin_io::DataGroup GenericData::DataGroup(u_int32_t dataGroupFilePos) { if (Open() == false) { affymetrix_calvin_exceptions::FileNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } // A little indirection std::ifstream fs; std::ifstream* pfs = &fileStream; // initialize to use GenericData::fileStream if (useMemoryMapping) { // Open a file stream OpenFStream(fs); pfs = &fs; } // and position it pfs->seekg(dataGroupFilePos, std::ios_base::beg); // Read the DataGroupHeader and all DataSetHeaders DataGroupHeader dch; DataGroupHeaderReader reader; reader.Read(*pfs, dch); if (useMemoryMapping) fs.close(); void* handle = 0; #ifdef _MSC_VER handle = fileMapHandle; #endif if (useMemoryMapping) { return affymetrix_calvin_io::DataGroup(Header().GetFilename(), dch, handle, loadEntireDataSetHint); } else { return affymetrix_calvin_io::DataGroup(Header().GetFilename(), dch, fileStream, loadEntireDataSetHint); } } /* * Erase the members data. */ void GenericData::Clear() { // Clear the header header.Clear(); Close(); useMemoryMapping = true; } /* * Finds a DataGroupHeader by name */ DataGroupHeader* GenericData::FindDataGroupHeader(const std::wstring& name) { return header.FindDataGroupHeader(name); } /* * Finds a DataGroupHeader by index */ DataGroupHeader* GenericData::FindDataGroupHeader(int32_t index) { DataGroupHeader* dch = 0; if (index >= 0 && index < header.GetDataGroupCnt()) { dch = &header.GetDataGroup(index); } return dch; } /* * Find a DataSetHeader given the DataGroupHeader and DataSet name */ DataSetHeader* GenericData::FindDataSetHeader(DataGroupHeader* dch, const std::wstring& dataSetName) { DataSetHeader* dph = 0; if (dch != 0) { dph = dch->FindDataSetHeader(dataSetName); } return dph; } /* * Find a DataSetHeader given the DataGroupHeader and DataSet index */ DataSetHeader* GenericData::FindDataSetHeader(DataGroupHeader* dch, u_int32_t dataSetIdx) { DataSetHeader* dph = 0; if (dch != 0) { // WAS if (dataSetIdx >= 0 && ...) BUT 'dataSetIdx >= 0' // is always true because 'dataSetIdx' is unsigned. if ((int32_t)dataSetIdx < dch->GetDataSetCnt()) { dph = &dch->GetDataSet(dataSetIdx); } } return dph; } /* * Read the full DataSetHeader if it has only been parially read. */ void GenericData::ReadFullDataSetHeader(DataSetHeader* dph) { // Check if the DataSet has been read fully. if (IsDSHPartiallyRead(dph)) { // Open a file stream std::ifstream fs; std::ifstream* pfs = &fileStream; // initialize to use GenericData::ifs if (useMemoryMapping) { OpenFStream(fs); pfs = &fs; } // and position it pfs->seekg(dph->GetHeaderStartFilePos(), std::ios_base::beg); // Read the header DataSetHeaderReader reader; reader.Read(*pfs, *dph); if (useMemoryMapping) fs.close(); } } /* * Determine if the DataSetHeader has been partially read. */ bool GenericData::IsDSHPartiallyRead(const DataSetHeader* dph) { if (dph == 0) return false; if (dph->GetRowCnt() == 0 && dph->GetColumnCnt() == 0 && dph->GetNameValParamCnt() == 0) return true; return false; } /* * Open the ifstream */ void GenericData::OpenFStream(std::ifstream& ifs) { Fs::aptOpen(ifs, Header().GetFilename(), std::ios::in | std::ios::binary); if (!ifs.is_open() && !ifs.good()) { affymetrix_calvin_exceptions::FileNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } bool GenericData::Open() { if (useMemoryMapping) return MapFile(); else { if (fileStream.is_open() == false) OpenFStream(fileStream); return true; } } void GenericData::Close() { UnmapFile(); if (fileStream.is_open()) fileStream.close(); } /* * Open a memory map on the file. */ bool GenericData::MapFile() { #ifdef _MSC_VER // On Windows the map is open in the GenericData object, otherwise it is opened in the DataSet if (fileHandle == INVALID_HANDLE_VALUE) { // Create the file. std::wstring filename = Fs::convertToUncPathW(Header().GetFilename(), 10); fileHandle = CreateFileW(filename.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (fileHandle == INVALID_HANDLE_VALUE) { Verbose::out(1, Fs::MSC_VER_GetLastErrorString(std::string("GenericData::MapFile ") + Util::toString(filename) + ": ")); return false; } } if (fileMapHandle == NULL) { // Use the current size of the file. DWORD dwSizeHigh = 0; DWORD dwSizeLow = 0; fileMapHandle = CreateFileMapping(fileHandle, NULL, PAGE_READONLY, dwSizeHigh, dwSizeLow, NULL); if (fileMapHandle == NULL) return false; } #endif return true; } /* * Close the memory-mapping on the file */ void GenericData::UnmapFile() { #ifdef _MSC_VER if (fileHandle != INVALID_HANDLE_VALUE) { if (fileMapHandle != NULL) { CloseHandle(fileMapHandle); fileMapHandle = NULL; } CloseHandle (fileHandle); fileHandle = INVALID_HANDLE_VALUE; } #endif } affxparser/src/fusion/calvin_files/data/src/GenericData.h0000644000175200017520000002314514516003651024512 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GenericData_HEADER_ #define _GenericData_HEADER_ /*! \file GenericData.h This file provides access to generic data. */ #ifdef _MSC_VER #include #endif #include "calvin_files/data/src/DataException.h" #include "calvin_files/data/src/DataGroup.h" #include "calvin_files/data/src/DataSet.h" #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/parsers/src/FileException.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffyStlCollectionTypes.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include // using namespace affymetrix_calvin_utilities; namespace affymetrix_calvin_io { /*! This class provides interfaces to store analysis results and data. */ class GenericData { public: /*! Constructor */ GenericData(); /*! Destructor */ ~GenericData(); /*! Returns the file identifier. * @return The file identifier. */ AffymetrixGuidType FileIdentifier(); /*! Returns the parent array file identifier. * @return The parent array file identifier. */ AffymetrixGuidType ArrayFileIdentifier(); /*! Returns the parent array identifier. * @return The parent array identifier. */ AffymetrixGuidType ArrayIdentifier(); /*! Returns a reference to the file header object * * @return File header object */ FileHeader& Header() { return header; } /*! Return the number of DataGroups in the GenericData object. * @return Number of DataGroups. */ u_int32_t DataGroupCnt() const; /*! Return the names of the DataGroups in the generic data object. * @param names An [in,out] vector that will receive the names of all DataGroups. */ void DataGroupNames(WStringVector& names); /*! Return the number of DataSets in the DataGroup referenced by index. * @param dataGroupIdx Index of the DataGroup. * @return Number of DataSets associated with the DataGroup. * @exception affymetrix_calvin_exceptions::DataGroupNotFoundException DataGroup not found. */ u_int32_t DataSetCnt(u_int32_t dataGroupIdx); /*! Return the number of DataSets in the DataGroup referenced by name. * @param dataGroupName Name of the DataGroup. * @return Number of DataSets associated with the DataGroup. * @exception affymetrix_calvin_exceptions::DataGroupNotFoundException DataGroup not found. */ u_int32_t DataSetCnt(const std::wstring& dataGroupName); /*! Return the DataSet names associated with a DataGroup. * @param dataGroupIdx Index of the DataGroup from which to retrieve the DataSet names. * @param names An [in,out] vector that will receive the names of all DataSets. * @exception affymetrix_calvin_exceptions::DataGroupNotFoundException DataGroup not found. */ void DataSetNames(u_int32_t dataGroupIdx, WStringVector& names); /*! Return the DataSet names associated with a DataGroup. * @param dataGroupName Name of the DataGroup from which to retrieve the DataSet names. * @param names An [in,out] vector that will receive the names of all DataSets. * @exception affymetrix_calvin_exceptions::DataGroupNotFoundException DataGroup not found. */ void DataSetNames(const std::wstring& dataGroupName, WStringVector& names); /*! Returns a pointer to the DataSet object by DataGroup and DataSet index. * Each call will return a new DataSet object. * The caller should call Delete when finished with the DataSet. * * @param dataGroupIdx The index of the DataGroup from which to find the DataSet. * @param dataSetIdx The index of the DataSet to return. * @return DataSet * @exception affymetrix_calvin_exceptions::DataGroupNotFoundException DataGroup not found. * @exception affymetrix_calvin_exceptions::DataSetNotFoundException DataSet not found. */ affymetrix_calvin_io::DataSet* DataSet(u_int32_t dataGroupIdx, u_int32_t dataSetIdx); /*! Returns a pointer to the DataSet object by DataGroup and DataSet name. * Each call will return a new DataSet object. * The caller should call Delete when finished with the DataSet. * * @param dataGroupName The name of the DataGroup from which to find the DataSet. * @param dataSetName The name of the DataSet to return. * @return DataSet * @exception affymetrix_calvin_exceptions::DataGroupNotFoundException DataGroup not found. * @exception affymetrix_calvin_exceptions::DataSetNotFoundException DataSet not found. */ affymetrix_calvin_io::DataSet* DataSet(const std::wstring& dataGroupName, const std::wstring& dataSetName); /*! Returns a DataGroup object based on a DataGroup file position. * This is useful when there are many DataGroups and the file position of each DataGroup is known (Calvin CDF). * In this case the GenericFileReader::ReadHeader() method should be called with the ReadNoDataGroupHeader flag. * @param dataGroupFilePos File position of the DataGroup in the current file * @return DataGroup object. */ affymetrix_calvin_io::DataGroup DataGroup(u_int32_t dataGroupFilePos); /*! Clears the contents of the class.*/ void Clear(); /*! Sets the file access method when reading data. Set this value before getting the first DataSet or DataGroup. * The default is to use memory-mapping. * @param value If value is true the file will be accessed with memory-mapping, if false, STL fstream will be used */ void UseMemoryMapping(bool value) { useMemoryMapping = value; } /*! Set the data loading policy for DataSets and DataGroups created by GenericData. * This is considered a hint to DataSet. Default value is false. * @param value If value is true, DataSets created subsequently will attempt to read the entire DataSet data into a memory buffer. */ void LoadEntireDataSetHint(bool value) { loadEntireDataSetHint = value; } // Protected members public: /*! Read the full DataSetHeader if it has only been parially read. * @param dph Pointer to the DataSetHeader to read */ void ReadFullDataSetHeader(DataSetHeader* dph); /*! Determine if the DataSetHeader has been partially read. * @param dph Pointer to the DataSetHeader to check * @return true if the dph has only been partially read or is 0, otherwise false. */ bool IsDSHPartiallyRead(const affymetrix_calvin_io::DataSetHeader* dph); /*! Opens a ifstream object on the file set on the Header() method * @param ifs The stream to open on the file. */ void OpenFStream(std::ifstream& ifs); /*! Finds a DataSetHeader by name. * @param name The name of the DataGroup * @return A pointer to the DataGroupHeader. If not found, the return is 0. */ affymetrix_calvin_io::DataGroupHeader* FindDataGroupHeader(const std::wstring& name); /*! Finds a DataGroupHeader by index. * @param index The index of the DataGroup. * @return A pointer to the DataGroupHeader. If not found, the return is 0. */ affymetrix_calvin_io::DataGroupHeader* FindDataGroupHeader(int32_t index); /*! Finds a DataSetHeader by index. * @param dch The DataGroupHeader of the DataGroup to which the DataSet belongs. * @param dataSetIdx The DataSet index of the DataSetHeader to find. * @return A pointer to the DataSetHeader if it is found, otherwise 0. */ static affymetrix_calvin_io::DataSetHeader* FindDataSetHeader(affymetrix_calvin_io::DataGroupHeader* dch, u_int32_t dataSetIdx); /*! Finds a DataSetHeader by name. * @param dch The DataGroupHeader of the DataGroup to which the DataSet belongs. * @param dataSetName The DataSet name of the DataSetHeader to find. * @return A pointer to the DataSetHeader if it is found, otherwise 0. */ static affymetrix_calvin_io::DataSetHeader* FindDataSetHeader(affymetrix_calvin_io::DataGroupHeader* dch, const std::wstring& dataSetName); /*! Opens the file for access. Has no effect on non-Windows systems. * @return True if the memory-mapping was opened successfully. */ bool Open(); /*! Closes the file. */ void Close(); /*! Opens a memory map on the file. Has no effect on non-Windows systems. * @return True if the memory-mapping was opened successfully. */ bool MapFile(); /*! Closes the memory map. Windows only */ void UnmapFile(); /*! Creates a new DataSet * @param dsh The DataSetHeader of the DataSet to create. * @return The new DataSet */ affymetrix_calvin_io::DataSet* CreateDataSet(DataSetHeader* dsh); protected: /*! The header and generic header objects */ affymetrix_calvin_io::FileHeader header; #ifdef _MSC_VER /*! Handle returned by CreateFileMapping */ HANDLE fileMapHandle; /*! Handle returned by CreateFile */ HANDLE fileHandle; #endif /*! Flag that indicates the file access technique; true = use memory-mapping, false = use ifstream */ bool useMemoryMapping; /*! fstream file access member */ std::ifstream fileStream; /*! Indicates whether DataSets and DataGroups created by GenericData should attempt to read all data into a memory buffer. */ bool loadEntireDataSetHint; friend class DataGroup; }; } #endif // _GenericData_HEADER_ affxparser/src/fusion/calvin_files/data/src/GenericDataHeader.cpp0000644000175200017520000001300414516003651026147 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/GenericDataHeader.h" // #include "calvin_files/parameter/src/AffymetrixParameterConsts.h" // #include // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; GenericDataHeader::GenericDataHeader() { locale = US_ENGLISH_LOCALE; } void GenericDataHeader::Clear() { fileTypeId.clear(); fileId.clear(); fileCreationTime.clear(); nameValParams.clear(); GenericDataHdrs.clear(); } void GenericDataHeader::SetFileTypeId(const std::string &p) { fileTypeId = p; } std::string GenericDataHeader::GetFileTypeId() const { return fileTypeId; } void GenericDataHeader::SetFileId(const affymetrix_calvin_utilities::AffymetrixGuidType &p) { fileId = p; } affymetrix_calvin_utilities::AffymetrixGuidType GenericDataHeader::GetFileId() const { return fileId; } void GenericDataHeader::SetFileCreationTime(const std::wstring &p) { fileCreationTime = p; } std::wstring GenericDataHeader::GetFileCreationTime() const { return fileCreationTime; } void GenericDataHeader::SetLocale(const std::wstring &p) { locale = p; } std::wstring GenericDataHeader::GetLocale() const { return locale; } void GenericDataHeader::AddNameValParam(const ParameterNameValueType &entry) { AddNameValParam(entry, true); } /** This override of the original method of the same name adds the parameter bool doUniqueAdds. This parameter is a hint flag to determine whether to bypass the FindParameter call within. FindParameter is used to ensure duplicates do not get entered into the collection. In most cases, the calling application is not updating or adding parameter to the collection, but is just reading the header parameters. Through testing it has been found that bypassing the FindParameter call made, will reduce reads of the header greatly. If an application is updating or adding parameters to the collection and require no duplicates to be added, then do not use this override method, but if you are just reading header parameters, then this will save time reading headers. */ void GenericDataHeader::AddNameValParam(const ParameterNameValueType &entry, bool doUniqueAdds) { if(doUniqueAdds == true) { ParameterNameValueTypeIt ii = FindNameValParam(entry); if((ii == nameValParams.end()) == false) { *ii = entry; } else { paramNameIdxMap[entry.GetName()] = nameValParams.size(); nameValParams.push_back(entry); } } else { paramNameIdxMap[entry.GetName()] = nameValParams.size(); nameValParams.push_back(entry); } } ParameterNameValueType GenericDataHeader::GetNameValParam(int32_t index) { return nameValParams[index]; } int32_t GenericDataHeader::GetNameValParamCnt() const { return (int32_t)nameValParams.size(); } void GenericDataHeader::GetNameValIterators(ParameterNameValueTypeIt &begin, ParameterNameValueTypeIt &end) { begin = nameValParams.begin(); end = nameValParams.end(); } void GenericDataHeader::AddParent(const GenericDataHeader &hdr) { GenericDataHdrs.push_back(hdr); } void GenericDataHeader::GetParentIterators(GenDataHdrVectorIt &begin, GenDataHdrVectorIt &end) { begin = GenericDataHdrs.begin(); end = GenericDataHdrs.end(); } int32_t GenericDataHeader::GetParentCnt() const { return (int32_t)GenericDataHdrs.size(); } GenericDataHeader GenericDataHeader::GetParent(int32_t index) const { return GenericDataHdrs[index]; } /* * Find an immediate parent GenericDataHeader based on file type id. Does not search grand-parents or above. */ GenericDataHeader* GenericDataHeader::FindParent(const std::string& fileTypeId) { GenericDataHeader* parentGDH = 0; GenDataHdrVectorIt begin, end; GetParentIterators(begin, end); for (GenDataHdrVectorIt ii = begin; ii != end; ++ii) { if (ii->GetFileTypeId() == fileTypeId) { parentGDH = &(*ii); break; } } return parentGDH; } bool GenericDataHeader::FindNameValParam(const std::wstring& name, ParameterNameValueType& result) { ParameterNameValueType t; t.SetName(name); t.SetValueText(L""); ParameterNameValueTypeIt found = FindNameValParam(t); if (found != nameValParams.end()) { result = *found; return true; } return false; } ParameterNameValueTypeIt GenericDataHeader::FindNameValParam(const ParameterNameValueType& p) { std::map::const_iterator idx = paramNameIdxMap.find(p.GetName()); if (idx == paramNameIdxMap.end()) { return nameValParams.end(); } return nameValParams.begin() + idx->second; } bool GenericDataHeader::GetNameValParamsBeginsWith(const std::wstring& beginsWith, ParameterNameValueTypeVector& p) { p.clear(); for (ParameterNameValueTypeIt ii = nameValParams.begin(); ii != nameValParams.end(); ++ii) { if (ii->GetName().find(beginsWith) == 0) { p.push_back(*ii); } } return (p.size() > 0); } affxparser/src/fusion/calvin_files/data/src/GenericDataHeader.h0000644000175200017520000001163514516003651025624 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GenericDataHeader_HEADER_ #define _GenericDataHeader_HEADER_ #include "calvin_files/parameter/src/AffymetrixParameterConsts.h" #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif using namespace affymetrix_calvin_parameter; namespace affymetrix_calvin_io { class GenericDataHeader { public: GenericDataHeader(); ~GenericDataHeader() { Clear(); } private: /*! */ std::string fileTypeId; /*! */ affymetrix_calvin_utilities::AffymetrixGuidType fileId; /*! */ std::wstring fileCreationTime; /*! */ std::wstring locale; /*! */ ParameterNameValueTypeVector nameValParams; /*! */ std::vector GenericDataHdrs; /*! */ std::map paramNameIdxMap; public: /*! */ void Clear(); /*! */ void SetFileTypeId(const std::string &p); /*! */ std::string GetFileTypeId() const; /*! */ void SetFileId(const affymetrix_calvin_utilities::AffymetrixGuidType &p); /*! */ affymetrix_calvin_utilities::AffymetrixGuidType GetFileId() const; /*! */ void SetFileCreationTime(const std::wstring &f); /*! */ std::wstring GetFileCreationTime() const; /*! */ void SetLocale(const std::wstring &p); /*! */ std::wstring GetLocale() const; /*! * Calls AddNameValParam with the doUniqueAddds set to true. */ void AddNameValParam(const ParameterNameValueType &p); /*! * Adds a ParameterNameValueType to the collection. * @param Parameter to add. * @param Hint to either perform a check to ensure unique parameters are in the collection (N^2 performance), else do not * perfomr the check. */ void AddNameValParam(const ParameterNameValueType &p, bool doUniqueAdds); /*! */ ParameterNameValueType GetNameValParam(int32_t index); /*! */ int32_t GetNameValParamCnt() const; /*! */ void GetNameValIterators(ParameterNameValueTypeIt &begin, ParameterNameValueTypeIt &end); /*! */ int32_t GetParentCnt() const; /*! */ void AddParent(const GenericDataHeader &hdr); /*! */ GenericDataHeader GetParent(int32_t index) const; /*! */ void GetParentIterators(std::vector::iterator &begin, std::vector::iterator &end); /*! Find an immediate parent GenericDataHeader based on file type id. Does not search grand-parents or above. * @param fileTypeId The fileTypeId of the parent header to find. * @return Returns a pointer to the parent GenericDataHeader if found, otherwise returns 0. */ GenericDataHeader* FindParent(const std::string& fileTypeId); /*! Finds a ParameterNameValueType by name in the nameValPairs collection * @param name The name of the NameValPair to find * @param result Reference to a ParameterNameValueType to fill with the found ParameterNameValueType. * @return true if the ParameterNameValueType was found */ bool FindNameValParam(const std::wstring& name, ParameterNameValueType& result); /*! Gets all ParameterNameValueType where the name starts with a given string. * @param beginsWith The string that the beginning of the ParameterNameValueType name needs to match. * @param p A result vector of ParameterNameValueTypes where the name begins with the beginsWith argument. */ bool GetNameValParamsBeginsWith(const std::wstring& beginsWith, ParameterNameValueTypeVector& p); protected: /*! Finds a ParameterNameValueType by name in the nameValPairs collection * @param p The ParameterNameValueType to find * @return An iterator referencing the NameValPair if it exists, otherwise it returns nameValPairs.end() */ ParameterNameValueTypeIt FindNameValParam(const ParameterNameValueType& p); }; /*! vector of GenericDataHeaders */ typedef std::vector GenDataHdrVector; /*! iterator of GenericDataHeaders */ typedef std::vector::iterator GenDataHdrVectorIt; }; #endif // _GenericDataHeader_HEADER_ affxparser/src/fusion/calvin_files/data/src/GenericDataTypes.h0000644000175200017520000000323214516003651025532 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GenericDataTypes_HEADER_ #define _GenericDataTypes_HEADER_ /*! \file GenericDataTypes.h Defines constants for each specific data file type. */ #include "calvin_files/utils/src/AffymetrixGuid.h" // namespace affymetrix_calvin_data { /*! Defines an identifier for the scan acquisition data file. */ #define SCAN_ACQUISITION_DATA_TYPE affymetrix_calvin_utilities::AffymetrixGuidType("affymetrix-calvin-scan-acquisition") /*! Defines an identifier for the scan acquisition data file. */ #define MULTI_SCAN_ACQUISITION_DATA_TYPE affymetrix_calvin_utilities::AffymetrixGuidType("affymetrix-calvin-multi-scan-acquisition") /*! Defines an identifier for the intensity data file. */ #define INTENSITY_DATA_TYPE affymetrix_calvin_utilities::AffymetrixGuidType("affymetrix-calvin-intensity") } #endif // _GenericDataTypes_HEADER_ affxparser/src/fusion/calvin_files/data/src/MarkerABSignals.h0000644000175200017520000000257314516003651025313 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _Marker_AB_Signals_HEADER_ #define _Marker_AB_Signals_HEADER_ /*! \file MarkerABSignals.h This file provides types to hold marker AB peak results. */ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // namespace affymetrix_calvin_data { /*! Holds the marker AB signal results */ typedef struct _MarkerABSignals { /*! The index to the probe set ids in the main data set. */ u_int32_t index; /* additional metric */ std::vector metrics; } MarkerABSignals; } #endif affxparser/src/fusion/calvin_files/data/src/MemMapFile.cpp0000644000175200017520000001315214516003651024650 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/data/src/MemMapFile.h" // using namespace affymetrix_calvin_io; /* * Gets the error message for the last error in a Window API call. */ std::wstring MemMapFile::GetErrorMsg() { LPVOID lpMsgBuf; if (!FormatMessageW( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, ::GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language (LPWSTR) &lpMsgBuf, 0, NULL )) { // Handle the error. return L""; } std::wstring message = (wchar_t*)lpMsgBuf; // Free the buffer. LocalFree( lpMsgBuf ); return message; } /* * Constructor */ MemMapFile::MemMapFile() { fileSize = 0; dwMapMode = 0; mappedData = 0; data = 0; startMapPos = 0; bytesMapped = 0; fileMapHandle = NULL; fileHandle = INVALID_HANDLE_VALUE; flushMMView = false; offsetToStart = 0; } /* * Creates a file. */ bool MemMapFile::Create(__int64 size) { Close(); if (fileName.length() == 0) return false; // create a memory-mapped file dwMapMode = FILE_MAP_READ; if (fileHandle == INVALID_HANDLE_VALUE) { fileHandle = CreateFile(fileName.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); dwMapMode = FILE_MAP_WRITE; } if (fileHandle != INVALID_HANDLE_VALUE) { fileSize = size; DWORD dwHigh = DWORD(size >> 32); DWORD dwLow = DWORD(size & 0xFFFFFFFF); DWORD dwProtect = (dwMapMode == FILE_MAP_READ) ? PAGE_READONLY : PAGE_READWRITE; fileMapHandle = CreateFileMapping(fileHandle, NULL, dwProtect, dwHigh, dwLow, NULL); if (fileMapHandle == NULL) { errorMsg = GetErrorMsg(); CloseHandle (fileHandle); fileHandle = INVALID_HANDLE_VALUE; return false; } } else { errorMsg = GetErrorMsg(); return false; } return true; } /* * Opens a file using memory-mapping. */ bool MemMapFile::Open(RWAccess rwAccess, ShareMode shareMode) { Close (); if (fileName.length() == 0) return false; // Open a file handle DWORD dwAccess = GENERIC_READ; dwMapMode = FILE_MAP_READ; if (rwAccess & WRITE) { dwAccess |= GENERIC_WRITE; dwMapMode = FILE_MAP_WRITE; } DWORD dwShare = 0; if (shareMode & ALLOWREAD) dwShare |= FILE_SHARE_READ; if (shareMode & ALLOWWRITE) dwShare |= FILE_SHARE_WRITE; DWORD dwCreate = (rwAccess & WRITE) ? ((rwAccess & READ) ? OPEN_ALWAYS : CREATE_ALWAYS) : OPEN_EXISTING; fileHandle = CreateFile(fileName.c_str(), dwAccess, dwShare, NULL, dwCreate, FILE_ATTRIBUTE_NORMAL, NULL); if (fileHandle == INVALID_HANDLE_VALUE) return false; DWORD dwSizeHigh = 0; DWORD dwSizeLow = GetFileSize(fileHandle, &dwSizeHigh); fileSize = (__int64(dwSizeHigh) << 32) + __int64(dwSizeLow); // Create a file map handle DWORD dwProtect = (dwMapMode == FILE_MAP_READ) ? PAGE_READONLY : PAGE_READWRITE; fileMapHandle = CreateFileMapping(fileHandle, NULL, dwProtect, dwSizeHigh, dwSizeLow, NULL); if (fileMapHandle == NULL) { errorMsg = GetErrorMsg(); CloseHandle(fileHandle); fileHandle = INVALID_HANDLE_VALUE; return false; } return true; } /* * Maps a view of the file. */ bool MemMapFile::MapData(__int64 start, DWORD bytes) { if (fileHandle == INVALID_HANDLE_VALUE) return false; if (fileMapHandle == NULL) return false; SYSTEM_INFO sysinfo; GetSystemInfo (&sysinfo); __int64 qwFileOffset = start; DWORD dwOffset = DWORD(qwFileOffset % sysinfo.dwAllocationGranularity); qwFileOffset = (qwFileOffset / sysinfo.dwAllocationGranularity) * sysinfo.dwAllocationGranularity; DWORD dwOffsetHigh = DWORD(qwFileOffset >> 32); DWORD dwOffsetLow = DWORD(qwFileOffset & 0xFFFFFFFF); if (start + __int64(bytes) > fileSize) bytes = (DWORD)(fileSize - start); DWORD dwBytesToMap = bytes + dwOffset; if (mappedData != 0) { if (flushMMView) FlushViewOfFile(mappedData, offsetToStart+bytesMapped); UnmapViewOfFile (mappedData); } mappedData = MapViewOfFile(fileMapHandle, dwMapMode, dwOffsetHigh, dwOffsetLow, dwBytesToMap); if (mappedData == NULL) { data = 0; bytesMapped = 0; CloseHandle (fileMapHandle); fileMapHandle = NULL; return false; } bytesMapped = bytes; offsetToStart = dwOffset; data = (char *)mappedData + dwOffset; startMapPos = (u_int32_t)start; // need to rethink for large files. return true; } /* * Closes the mapped view and file access. */ void MemMapFile::Close() { UnmapFile(); } /* * Unmaps a view */ void MemMapFile::UnmapFile() { if (fileHandle != INVALID_HANDLE_VALUE) { if (fileMapHandle != NULL) { if (mappedData != 0 ) { if (flushMMView) FlushViewOfFile(mappedData, offsetToStart+bytesMapped); UnmapViewOfFile(mappedData); mappedData = 0; } CloseHandle(fileMapHandle); fileMapHandle = NULL; data = 0; } CloseHandle (fileHandle); fileHandle = INVALID_HANDLE_VALUE; } } affxparser/src/fusion/calvin_files/data/src/MemMapFile.h0000644000175200017520000001123714516003651024317 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _MemMapFile_HEADER_ #define _MemMapFile_HEADER_ /*! \file MemMapFile.h This file defines a class that encapsulates Windows memory-mapped * file access methods. */ #ifdef _MSC_VER #include #endif // #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include // namespace affymetrix_calvin_io { typedef enum RWAccess { READ = 1, WRITE, READ_AND_WRITE }; typedef enum ShareMode { EXCLUSIVE, ALLOWREAD, ALLOWWRITE, ALLOWREAD_AND_WRITE }; /*! This class encapsulates Window memory-mapped file access methods. */ class MemMapFile { public: /*! Constructor */ MemMapFile(); /*! Destructor */ ~MemMapFile() { Close(); } /*! Sets the name of the file to access. * @param value Name of the file. */ void SetFilename(std::string value) { fileName = value; } /*! Get the name of the file. * @return Name of the file. */ std::string GetFilename() const { return fileName; } /*! A flag that indicates if the memory-mapped view is to be flushed after unmapping. * @param value True indicates to flush a file after closing. */ void SetFlushMMView(bool value) { flushMMView = value; } /*! Creates a file. * @param size Size, in bytes, of the file to create. */ bool Create(__int64 size); /*! Opens a file using memory-mapping. * @param rwAccess Type of file access desried; read, write or both. * @param shareMode Share mode. * @return Returns true if successful. */ bool Open(RWAccess rwAccess, ShareMode shareMode); /*! Maps a view of the file. * The data data can be accessed by calling GetDataPtr * @param start Start file position of the mapped view. * @param bytes The number of bytes to map into the view. * @return Returns true if successful. */ bool MapData(__int64 start, DWORD bytes); /*! Closes the mapped view and file access. */ void Close(); /*! Get the pointer to the mapped view. * @return Returns the address of the mapped view of the file. */ char* GetDataPtr() { return data; } /*! Get the file position of the first byte mapped into the view. * @return Returns the file position of the first byte mapped into the view. */ u_int32_t GetFirstMappedBytePos() const { return startMapPos; } /*! Get the number of bytes mapped into the current view. * @return Returns the number of bytes mapped into the current view. */ u_int32_t GetBytesMapped() const { return bytesMapped; } /*! Get the error message of the most recent error. * @return Returns the error message from the most recent error. */ std::wstring GetLastError() { return errorMsg; } /*! Checks if a view is mapped. * @return Returns true if a view is mapped. */ bool IsViewMapped() { return (mappedData != 0); } protected: /*! Unmaps a view */ void UnmapFile(); /*! Gets the error message for the last error in a Window API call. * @return Returns the last Windows API error message. */ std::wstring GetErrorMsg(); protected: /*! Name of the file. */ std::string fileName; /*! map mode */ DWORD dwMapMode; /*! Handle returned by CreateFileMapping */ HANDLE fileMapHandle; /*! Handle returned by CreateFile */ HANDLE fileHandle; /*! pointer to the mapped data, doesn't account for allocation granularity. */ void* mappedData; /*! pointer to the mapped data, allocation granularity has been accounted for. */ char* data; /*! The file position of the first byte mapped into the view */ u_int32_t startMapPos; /*! The number of bytes mapped into the view */ u_int32_t bytesMapped; /*! Size of the file */ __int64 fileSize; /*! Flag that indicates if the memory-mapped view is to be flushed after unmapping */ bool flushMMView; /*! The number extra bytes mapped at the start of a view because of allocation granularity */ u_int32_t offsetToStart; // used by FlushViewOfFile /*! Error message */ std::wstring errorMsg; }; } #endif // _MemMapFile_HEADER_ affxparser/src/fusion/calvin_files/data/src/ProbeSetMultiDataData.cpp0000644000175200017520000000545714516003651027027 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "calvin_files/data/src/ProbeSetMultiDataData.h" // #include #include // u_int8_t affymetrix_calvin_data::ChromosomeFromString(const std::string& chr) { int chrValue = atoi(chr.c_str()); if (chrValue == 0) { if (chr == "X" || chr == "x") chrValue = X_CHR; else if (chr == "Y" || chr == "y") chrValue = Y_CHR; else if (chr == "MT" || chr == "mt") chrValue = MT_CHR; else chrValue = NO_CHR; } return chrValue; } std::string affymetrix_calvin_data::ChromosomeToString(u_int8_t chr) { if (chr == X_CHR) return "X"; else if (chr == Y_CHR) return "Y"; else if (chr == MT_CHR) return "MT"; else if (chr == NO_CHR) return "-"; else { std::ostringstream str; str << (int)chr; return str.str(); } } u_int8_t affymetrix_calvin_data::CytoCallFromString(const std::string &call) { if (call == "A") return CYTO_ABSENT_CALL; else if (call == "P") return CYTO_PRESENT_CALL; else return CYTO_NO_CALL; } std::string affymetrix_calvin_data::CytoCallToString(u_int8_t call) { switch (call) { case CYTO_ABSENT_CALL: return "A"; break; case CYTO_PRESENT_CALL: return "P"; break; default: return "NC"; break; } } u_int8_t affymetrix_calvin_data::GenotypeCallFromString(const std::string &call) { if (call == "A" || call == "AA") return SNP_AA_CALL; else if (call == "B" || call == "BB") return SNP_BB_CALL; else if (call == "AB") return SNP_AB_CALL; else return SNP_NO_CALL; } std::string affymetrix_calvin_data::GenotypeCallToString(u_int8_t call) { switch (call) { case SNP_AA_CALL: return "A"; break; case SNP_AB_CALL: return "AB"; break; case SNP_BB_CALL: return "BB"; break; default: return "No Call"; break; } } affxparser/src/fusion/calvin_files/data/src/ProbeSetMultiDataData.h0000644000175200017520000001662314516003651026471 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ProbeSetMultiDataData_HEADER_ #define _ProbeSetMultiDataData_HEADER_ /*! \file ProbeSetMultiDataData.h This file provides types to hold MultiData results. */ // #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include #include // namespace affymetrix_calvin_data { /*! The AA call */ #define SNP_AA_CALL 6 /*! The BB call */ #define SNP_BB_CALL 7 /*! The AB call */ #define SNP_AB_CALL 8 /*! The no call allele call */ #define SNP_NO_CALL 11 /*! Stores data for a genotype result of a probe set. */ typedef struct _ProbeSetMultiDataGenotypeData { /*! The name of the probe set. */ std::string name; /*! The call. */ u_int8_t call; /*! The confidence of the call. */ float confidence; /*! Other metrics associated with the call. */ std::vector metrics; } ProbeSetMultiDataGenotypeData; /*! A value to represent the X chromosome. */ #define X_CHR 24 /*! A value to represent the Y chromosome. */ #define Y_CHR 25 /*! A value to represent the MT chromosome. */ #define MT_CHR 26 /*! A value to represent the no chromosome. */ #define NO_CHR 255 /*! Stores data for a copy number result of a probe set. */ typedef struct _ProbeSetMultiDataCopyNumberData { /*! The name of the probe set. */ std::string name; /*! The chromosome value. */ u_int8_t chr; /*! The physical position. */ u_int32_t position; /*! Other metrics associated with the result. */ std::vector metrics; } ProbeSetMultiDataCopyNumberData; /*! A no call for the cyto result. */ #define CYTO_NO_CALL 0 /*! An absent call for the cyto result. */ #define CYTO_ABSENT_CALL 1 /*! A present call for the cyto result. */ #define CYTO_PRESENT_CALL 2 /*! A gain call for the cyto result. */ #define CYTO_LOSS_CALL 2 /*! A gain call for the cyto result. */ #define CYTO_GAIN_CALL 3 /*! Stores data for a cyto results. */ typedef struct _ProbeSetMultiDataCytoRegionData { /*! The name of the region. */ std::string name; /*! The chromosome value. */ u_int8_t chr; /*! The physical start position. */ u_int32_t startPosition; /*! The physical stop position. */ u_int32_t stopPosition; /*! The call for the region. */ u_int8_t call; /*! The confidence score. */ float confidenceScore; /*! Other metrics associated with the result. */ std::vector metrics; } ProbeSetMultiDataCytoRegionData; /*! Stores data for an expression result of a probe set. */ typedef struct _ProbeSetMultiDataExpressionData { /*! The name of the probe set. */ std::string name; /*! The quantification of the probe set. */ float quantification; /*! Other metrics associated with the call. */ std::vector metrics; } ProbeSetMultiDataExpressionData; /*! Stores data for a copy number variation results. */ typedef struct _ProbeSetMultiDataCopyNumberVariationRegionData { /*! The name of the region. */ std::string name; /*! The signal for the region */ float signal; /*! The call for the region. */ u_int8_t call; /*! The confidence score. */ float confidenceScore; /*! Other metrics associated with the result. */ std::vector metrics; } ProbeSetMultiDataCopyNumberVariationRegionData; /*! Stores data for a DMET copy number result of a probe set. */ typedef struct _DmetCopyNumberData { /*! The name of the probe set. */ std::string name; /*! The call for the region. */ int16_t call; /*! The confidence score. */ float confidence; /*! The copy number force*/ int16_t force; /*! copy number estimate */ float estimate; /*! lower bounds */ float lower; /*! upper bounds */ float upper; /*! Other metrics associated with the result. */ std::vector metrics; } DmetCopyNumberData; /*! Stores data for a DMET genotype probe set. */ typedef struct _DmetMultiAllelicData { /*! The name of the probe set. */ std::string name; /*! The call for the region. */ u_int8_t call; /*! The confidence score. */ float confidence; /*! The copy number force*/ u_int8_t force; /*! copy number estimate */ u_int8_t alleleCount; float signalA; float signalB; float signalC; float signalD; float signalE; float signalF; u_int8_t contextA; u_int8_t contextB; u_int8_t contextC; u_int8_t contextD; u_int8_t contextE; u_int8_t contextF; /*! Other metrics associated with the result. */ std::vector metrics; } DmetMultiAllelicData; /*! Stores data for a DMET genotype probe set. */ typedef struct _DmetBiAllelicData { /*! The name of the probe set. */ std::string name; /*! The call for the region. */ u_int8_t call; /*! The confidence score. */ float confidence; /*! The copy number force*/ u_int8_t force; /*! lower bounds */ float signalA; float signalB; u_int8_t contextA; u_int8_t contextB; /*! Other metrics associated with the result. */ std::vector metrics; } DmetBiAllelicData; /*! Convert a string representation of a chromosome to a numeric representation. * @param chr The chromosome value. * @return A numeric representation of the chromosome value. */ u_int8_t ChromosomeFromString(const std::string& chr); /*! Convert a numeric representation of a chromosome to a string representation. * @param chr The chromosome value. * @return A string representation of the chromosome value. */ std::string ChromosomeToString(u_int8_t chr); /*! Convert a string representation of a cyto call to a numeric representation. * @param call The call value. * @return A numeric representation of the call value. */ u_int8_t CytoCallFromString(const std::string &call); /*! Convert a numeric representation of a call to a string representation. * @param call The call value. * @return A string representation of the call value. */ std::string CytoCallToString(u_int8_t call); /*! Convert a string representation of a genotype call to a numeric representation. * @param call The call value. * @return A numeric representation of the call value. */ u_int8_t GenotypeCallFromString(const std::string &call); /*! Convert a numeric representation of a genotype call to a string representation. * @param call The call value. * @return A string representation of the call value. */ std::string GenotypeCallToString(u_int8_t call); } #endif affxparser/src/fusion/calvin_files/data/src/ProbeSetQuantificationData.h0000644000175200017520000000274114516003651027557 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ProbeSetQuantificationData_HEADER_ #define _ProbeSetQuantificationData_HEADER_ /*! \file ProbeSetQuantificationData.h This file provides types to hold quantification results. */ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include // namespace affymetrix_calvin_data { /*! Stores data for a quantification of a probe set. */ typedef struct _ProbeSetQuantificationData { /*! The name of the probe set. */ std::string name; /*! The probe set id. */ int32_t id; /*! The quantification associated to the name. */ float quantification; } ProbeSetQuantificationData; } #endif affxparser/src/fusion/calvin_files/data/src/ProbeSetQuantificationDetectionData.h0000644000175200017520000000313314516003651031412 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ProbeSetQuantificationDetectionData_HEADER_ #define _ProbeSetQuantificationDetectionData_HEADER_ /*! \file ProbeSetQuantificationDetectionData.h This file provides types to hold quantification and detection results. */ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include // namespace affymetrix_calvin_data { /*! Stores data for a quantification/detection of a probe set. */ typedef struct _ProbeSetQuantificationDetectionData { /*! The name of the probe set. */ std::string name; /*! The probe set id. */ int32_t id; /*! The quantification associated to the name. */ float quantification; /*! The detection p-value. */ float pvalue; } ProbeSetQuantificationDetectionData; } #endif affxparser/src/fusion/calvin_files/data/src/TilingResultData.h0000644000175200017520000000277614516003651025572 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _TilingResultData_HEADER_ #define _TilingResultData_HEADER_ /*! \file TilingResultData.h This file provides types to hold tiling results. */ #include "calvin_files/parameter/src/ParameterNameValueType.h" // #include #include // /*! Stores data for a tiling array sequence. */ typedef struct _TilingSequenceData { /*! The name of the sequence. */ std::wstring name; /*! The version associated to the sequence. */ std::wstring version; /*! The group name for the sequence. */ std::wstring groupName; /*! The parameter name/value array. */ affymetrix_calvin_parameter::ParameterNameValueTypeList parameters; } TilingSequenceData; #endif affxparser/src/fusion/calvin_files/exception/0000755000175200017520000000000014516003651022464 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/exception/src/0000755000175200017520000000000014516022540023251 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/exception/src/DevelopmentException.cpp0000644000175200017520000000212414516003651030117 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/exception/src/DevelopmentException.h" // namespace affymetrix_calvin_exceptions { const std::wstring NotImplementedException::ToString() { return L"affymetrix_calvin_exceptions::NotImplementedException thrown."; } } affxparser/src/fusion/calvin_files/exception/src/DevelopmentException.h0000644000175200017520000000302014516003651027560 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DevelopmentException_HEADER_ #define _DevelopmentException_HEADER_ #include "calvin_files/exception/src/ExceptionBase.h" // /*! \file DevelopmentException.h This file defines exceptions used during development. */ namespace affymetrix_calvin_exceptions { class NotImplementedException : public CalvinException { public: NotImplementedException() : CalvinException() {} NotImplementedException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; } #endif affxparser/src/fusion/calvin_files/exception/src/ExceptionBase.cpp0000644000175200017520000001665214516003651026522 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! \file ExceptionBase.h This file provides class definition for the base exception types. */ #include "calvin_files/exception/src/ExceptionBase.h" // #include // using namespace affymetrix_calvin_exceptions; using namespace affymetrix_calvin_utilities; /*! Constructor * Default constructor. Initializes all variables to 0 if numeric, "" if string and the date to the current date and time. */ CalvinException::CalvinException() { errorCode = 0; lineNumber = 0; fileName = ""; timeStamp = DateTime::GetCurrentDateTime().ToString(); errorDescription = L""; sourceName = L""; } /*! Constructor * Constructs a CalvinException object. * * @param _Source Typically will be the application name. Can also be the file sdk object type E.g. "Array File Reader". * @param _Description A brief message which describes what happened. * @param _TimeStamp Time and date the exception occured. * @param _FileName File name of the source file the exception occured. The __FILE__ can be used to determine this information. * @param _LineNumber Line number in the source file that generated the exception. The __LINE__ can be used to determine this information. * @param _ErrorCode An numeric value the is unique to this error/exception type */ CalvinException::CalvinException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode) { Source(_Source); Description(_Description); LineNumber(_LineNumber); SourceFile(_FileName); TimeStamp(_TimeStamp); ErrorCode(_ErrorCode); } /*! Constructor * Constructs a CalvinException object * Note: The time date stamp will be automatically set to the current time. * * @param _Source Typically will be the application name. Can also be the file sdk object type E.g. "Array File Reader". * @param _Description A brief message which describes what happened. * @param _ErrorCode An numeric value the is unique to this error/exception type */ CalvinException::CalvinException(std::wstring _Source, std::wstring _Description, u_int64_t _ErrorCode) { Source(_Source); Description(_Description); LineNumber(0); SourceFile(""); TimeStamp(DateTime::GetCurrentDateTime().ToString()); ErrorCode(_ErrorCode); } /*! Constructor * Constructs a CalvinException object * Note: The time date stamp will be automatically set to the current time. * * @param _Description A brief message which describes what happened. * @param _ErrorCode An numeric value the is unique to this error/exception type */ CalvinException::CalvinException(std::wstring _Description, u_int64_t _ErrorCode) { Source(L""); Description(_Description); LineNumber(0); SourceFile(""); TimeStamp(DateTime::GetCurrentDateTime().ToString()); ErrorCode(_ErrorCode); } /*! Constructor * Constructs a CalvinException object * Note: The time date stamp will be automatically set to the current time. * * @param _ErrorCode An numeric value the is unique to this error/exception type */ CalvinException::CalvinException(u_int64_t _ErrorCode) { Source(L""); Description(L""); LineNumber(0); SourceFile(""); TimeStamp(DateTime::GetCurrentDateTime().ToString()); ErrorCode(_ErrorCode); } /*! The source name associated with the exception. * * @return Source name associated with the exception. */ std::wstring CalvinException::Source() { return sourceName; } /*! The source name associated with the exception. * * @param value Source name associated with the exception. */ void CalvinException::Source(std::wstring value) { sourceName = value; } /*! The error description associated with the exception. * * @return The error description associated with the exception. */ std::wstring CalvinException::Description() { return errorDescription; } /*! The error description associated with the exception. * * @param value Error description associated with the exception. */ void CalvinException::Description(std::wstring value) { errorDescription = value; } /*! The error time stamp associated with the exception. * * @return Error time stamp associated with the exception. */ std::wstring CalvinException::TimeStamp() { return timeStamp; } /*! The error time stamp associated with the exception. * * @param value Error time stamp associated with the exception. */ void CalvinException::TimeStamp(std::wstring value) { timeStamp = value; } /*! The error source file name associated with the exception. * * @return The source file name associated with the exception. */ std::string CalvinException::SourceFile() { return fileName; } /*! The error source file name associated with the exception. * * @param value Source file name associated with the exception. */ void CalvinException::SourceFile(std::string value) { fileName = value; } /*! The error source line number associated with the exception. * * @return The source line number associated with the exception. */ u_int16_t CalvinException::LineNumber() { return lineNumber; } /*! The error source line number associated with the exception. * * @param value Source line number associated with the exception. */ void CalvinException::LineNumber(u_int16_t value) { lineNumber = value; } /*! The error code associated with the exception. * * @return The error code associated with the exception. */ u_int64_t CalvinException::ErrorCode() { return errorCode; } /*! The error code associated with the exception. * * @param value Error code associated with the exception. */ void CalvinException::ErrorCode(u_int64_t value) { errorCode = value; } /*! The error code associated with the exception. * * @return Error code associated with the exception. */ const std::wstring CalvinException::ToString() { std::wstring fullMsg; fullMsg = L"Not implemented yet."; return fullMsg; } /*! Format source file, line and time stamp. * * @return Returns a string combining source file, line number and time stamp. */ std::wstring CalvinException::SystemInfo() { std::wstring systemInfo; // TIME STAMP if ( !timeStamp.empty() ) { systemInfo = timeStamp; } // FILE NAME if ( !fileName.empty() ) { std::wstring wfileName( fileName.length(), L' ' ); std::copy(fileName.begin(), fileName.end(), wfileName.begin()); if( systemInfo.empty() ) { systemInfo = wfileName; } else { systemInfo = systemInfo + L":" + wfileName; } } // LINE NUMBER if ( lineNumber ) { std::wostringstream line; line << lineNumber; if( systemInfo.empty() ) { systemInfo = line.str(); } else { systemInfo = systemInfo + L":" + line.str(); } } if ( !systemInfo.empty() ) { systemInfo = systemInfo + L":"; } return systemInfo; } affxparser/src/fusion/calvin_files/exception/src/ExceptionBase.h0000644000175200017520000001453714516003651026167 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ExceptionBase_HEADER_ #define _ExceptionBase_HEADER_ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/DateTime.h" // #include #include // /*! \file ExceptionBase.h This file provides base class definition for the exception class. */ namespace affymetrix_calvin_exceptions { /*! Base exception class for the Calvin File SDK */ class CalvinException { protected: /*! Source in the _MSC_VER use case is used for registry lookup to determine the resource file. The message code is used incojuction with source to lookup the specific message string based on code. */ std::wstring sourceName; /*! Message for the exception */ std::wstring errorDescription; /*! Date/Time stamp of exception */ std::wstring timeStamp; /*! File the exception occured in */ std::string fileName; /*! Line number that the exception occured */ u_int16_t lineNumber; /*! Message code to be used by client for logic or to use for lookup in localized resource file */ u_int64_t errorCode; public: /*! Constructor * Default constructor. Initializes all variables to 0 if numeric, "" if string and the date to the current date and time. */ CalvinException(); /*! Constructor * Constructs a CalvinException object. * * @param _Source Typically will be the application name. Can also be the file sdk object type E.g. "Array File Reader". * @param _Description A brief message which describes what happened. * @param _TimeStamp Time and date the exception occured. * @param _FileName File name of the source file the exception occured. The __FILE__ can be used to determine this information. * @param _LineNumber Line number in the source file that generated the exception. The __LINE__ can be used to determine this information. * @param _ErrorCode An numeric value the is unique to this error/exception type */ CalvinException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode); /*! Constructor * Constructs a CalvinException object * Note: The time date stamp will be automatically set to the current time. * * @param _Source Typically will be the application name. Can also be the file sdk object type E.g. "Array File Reader". * @param _Description A brief message which describes what happened. * @param _ErrorCode An numeric value the is unique to this error/exception type */ CalvinException(std::wstring _Source, std::wstring _Description, u_int64_t _ErrorCode); /*! Constructor * Constructs a CalvinException object * Note: The time date stamp will be automatically set to the current time. * * @param _Description A brief message which describes what happened. * @param _ErrorCode An numeric value the is unique to this error/exception type */ CalvinException(std::wstring _Description, u_int64_t _ErrorCode); /*! Constructor * Constructs a CalvinException object * Note: The time date stamp will be automatically set to the current time. * * @param _ErrorCode An numeric value the is unique to this error/exception type */ CalvinException(u_int64_t _ErrorCode); /*! The source name associated with the exception. * * @return The source name associated with the exception. */ std::wstring Source(); /*! The source name associated with the exception. * * @param value Source name associated with the exception. */ void Source(std::wstring value); /*! The error description associated with the exception. * * @return The error description associated with the exception. */ std::wstring Description(); /*! The error description associated with the exception. * * @param value Error description associated with the exception. */ void Description(std::wstring value); /*! The error time stamp associated with the exception. * * @return The error time stamp associated with the exception. */ std::wstring TimeStamp(); /*! The error time stamp associated with the exception. * * @param value Error time stamp associated with the exception. */ void TimeStamp(std::wstring value); /*! The error source file name associated with the exception. * * @return The source file name associated with the exception. */ std::string SourceFile(); /*! The error source file name associated with the exception. * * @param value Source file name associated with the exception. */ void SourceFile(std::string value); /*! The error source line number associated with the exception. * * @return The source line number associated with the exception. */ u_int16_t LineNumber(); /*! The error source line number associated with the exception. * * @param value Source line number associated with the exception. */ void LineNumber(u_int16_t value); /*! The error code associated with the exception. * * @return The error code associated with the exception. */ u_int64_t ErrorCode(); /*! The error code associated with the exception. * * @param value Error code associated with the exception. */ void ErrorCode(u_int64_t value); /*! Returns a string describing the exception * * @return Returns a string describing the exception. */ const std::wstring ToString(); /*! Format source file, line and time stamp. * * @return Returns a string combining source file, line number and time stamp. */ std::wstring SystemInfo(); }; } #endif // _ExceptionBase_HEADER_ affxparser/src/fusion/calvin_files/exception/src/InterpretationException.cpp0000644000175200017520000000210714516003651030645 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/exception/src/InterpretationException.h" // namespace affymetrix_calvin_exceptions { const std::wstring FormatException::ToString() { return L"affymetrix_calvin_exceptions::FormatException thrown."; } } affxparser/src/fusion/calvin_files/exception/src/InterpretationException.h0000644000175200017520000000305714516003651030317 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _InterpretationException_HEADER_ #define _InterpretationException_HEADER_ #include "calvin_files/exception/src/ExceptionBase.h" // /*! \file InterpretationException.h This file defines exceptions that are the result * of the system not being able to interpret input. */ namespace affymetrix_calvin_exceptions { class FormatException : public CalvinException { public: FormatException() : CalvinException() {} FormatException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; } #endif affxparser/src/fusion/calvin_files/fusion/0000755000175200017520000000000014516003651021771 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/fusion/src/0000755000175200017520000000000014516022540022556 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/fusion/src/CalvinAdapter/0000755000175200017520000000000014516022540025273 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/fusion/src/CalvinAdapter/CalvinCELDataAdapter.cpp0000644000175200017520000003063514516003651031643 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/CalvinAdapter/CalvinCELDataAdapter.h" // #include "calvin_files/data/src/GenericDataTypes.h" #include "calvin_files/parameter/src/CELAlgorithmParameterNames.h" #include "calvin_files/parsers/src/CelFileReader.h" #include "calvin_files/utils/src/StringUtils.h" // #include // using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_exceptions; using namespace affymetrix_calvin_utilities; /* * Constructor */ CalvinCELDataAdapter::CalvinCELDataAdapter() { } /* * Destructor */ CalvinCELDataAdapter::~CalvinCELDataAdapter() { calvinCel.Clear(); } /* * Can this object read the file */ bool CalvinCELDataAdapter::CanReadFile() { CelFileReader reader; try { reader.Read(calvinCel); } catch(...) { return false; } return true; } /* * Return the generic data object. */ GenericData *CalvinCELDataAdapter::GetGenericData() { return &calvinCel.GetGenericData(); } /* */ void CalvinCELDataAdapter::SetFileName(const std::string& value) { calvinCel.SetFilename(value); } /* */ std::string CalvinCELDataAdapter::GetFileName() const { return calvinCel.GetFilename(); } /* */ AffymetrixGuidType CalvinCELDataAdapter::GetFileId() { return calvinCel.GetFileHeader()->GetGenericDataHdr()->GetFileId(); } /* */ std::wstring CalvinCELDataAdapter::GetParams() { std::wstring params; ParameterNameValueTypeVector algParams; calvinCel.GetAlgorithmParameters(algParams); for (ParameterNameValueTypeIt ii = algParams.begin(); ii!= algParams.end(); ++ii) { if (ii != algParams.begin()) params += L";"; params += ii->GetName(); params += L":"; params += ii->ToString(); } return params; } /* */ std::wstring CalvinCELDataAdapter::GetAlgorithmParameter(const wchar_t *tag) { std::wstring result; ParameterNameValueType param; if (calvinCel.FindAlgorithmParameter(tag, param)) { result = param.ToString(); } return result; } /* */ std::wstring CalvinCELDataAdapter::GetAlgorithmParameterTag(int index) { ParameterNameValueTypeVector algParams; calvinCel.GetAlgorithmParameters(algParams); std::wstring result; try { result = algParams.at(index).GetName(); } catch(...) { } return result; } /* */ int CalvinCELDataAdapter::GetNumberAlgorithmParameters() { ParameterNameValueTypeVector algParams; calvinCel.GetAlgorithmParameters(algParams); return (int)algParams.size(); } /* */ std::wstring CalvinCELDataAdapter::GetAlgorithmParameters() { return GetParams(); } /* */ void CalvinCELDataAdapter::GetParameters(FusionTagValuePairTypeList& values) { ParameterNameValueTypeVector algParams; calvinCel.GetAlgorithmParameters(algParams); for (ParameterNameValueTypeIt ii = algParams.begin(); ii!= algParams.end(); ++ii) { FusionTagValuePairType ft; ft.Tag = ii->GetName(); ft.Value = ii->ToString(); ft.DetailedType() = *ii; values.push_back(ft); } } /* */ std::wstring CalvinCELDataAdapter::GetDatHeader() { std::wstring datHeader; //GenDataHdrVectorIt begin, end; GenericDataHeader* gdh = calvinCel.GetFileHeader()->GetGenericDataHdr()->FindParent(SCAN_ACQUISITION_DATA_TYPE); if (gdh) { // found the right header, now look for the parameter ParameterNameValueType nvt; if (gdh->FindNameValParam(DAT_HEADER_PARAM_NAME, nvt)) { if (nvt.GetParameterType() == ParameterNameValueType::TextType) datHeader = nvt.GetValueText(); } else if (gdh->FindNameValParam(PARTIAL_DAT_HEADER_PARAM_NAME, nvt)) { if (nvt.GetParameterType() == ParameterNameValueType::TextType) { std::wstring partialDatHeader = nvt.GetValueText(); u_int16_t min = 0; u_int16_t max = 0; // Find the max and min parameters and append to the string. if (gdh->FindNameValParam(MAX_PIXEL_INTENSITY_PARAM_NAME, nvt)) { if (nvt.GetParameterType() == ParameterNameValueType::UInt16Type) max = nvt.GetValueUInt16(); } if (gdh->FindNameValParam(MIN_PIXEL_INTENSITY_PARAM_NAME, nvt)) { if (nvt.GetParameterType() == ParameterNameValueType::UInt16Type) min = nvt.GetValueUInt16(); } wchar_t buf[30]=L""; FormatString2(buf, 30, L"[%d..%d]", min, max); datHeader = buf; datHeader += partialDatHeader; } } } return datHeader; } /* */ int CalvinCELDataAdapter::GetCellMargin() { ParameterNameValueType nvt; if (calvinCel.FindAlgorithmParameter(CELLMARGIN_PARAM_NAME, nvt)) { switch(nvt.GetParameterType()) { case ParameterNameValueType::Int32Type: return nvt.GetValueInt32(); break; case ParameterNameValueType::Int16Type: return (int)nvt.GetValueInt16(); break; case ParameterNameValueType::Int8Type: return (int)nvt.GetValueInt8(); break; case ParameterNameValueType::UInt32Type: return (int)nvt.GetValueUInt32(); break; case ParameterNameValueType::UInt16Type: return (int)nvt.GetValueUInt16(); break; case ParameterNameValueType::UInt8Type: return (int)nvt.GetValueUInt8(); break; case ParameterNameValueType::AsciiType: return (int)atoi(nvt.GetValueAscii().c_str()); default: return 0; break; } } else { return 0; } } unsigned int CalvinCELDataAdapter::GetNumOutliers() { XYCoordVector coords; calvinCel.GetOutlierCoords(coords); return (unsigned int)coords.size(); } unsigned int CalvinCELDataAdapter::GetNumMasked() { XYCoordVector coords; calvinCel.GetMaskedCoords(coords); return (unsigned int)coords.size(); } /* * Get the grid coordinates. */ affymetrix_fusion_io::FGridCoords CalvinCELDataAdapter::GetGridCorners() { affymetrix_fusion_io::FGridCoords zeroGrid; affymetrix_fusion_io::FGridCoords grid; ParameterNameValueType nvt; if (calvinCel.FindAlgorithmParameter(GRIDULX_PARAM_NAME, nvt) && nvt.GetParameterType() == ParameterNameValueType::FloatType) { grid.upperleft.x = nvt.GetValueFloat(); } else { return zeroGrid; } if (calvinCel.FindAlgorithmParameter(GRIDULY_PARAM_NAME, nvt) && nvt.GetParameterType() == ParameterNameValueType::FloatType) { grid.upperleft.y = nvt.GetValueFloat(); } else { return zeroGrid; } if (calvinCel.FindAlgorithmParameter(GRIDURX_PARAM_NAME, nvt) && nvt.GetParameterType() == ParameterNameValueType::FloatType) { grid.upperright.x = nvt.GetValueFloat(); } else { return zeroGrid; } if (calvinCel.FindAlgorithmParameter(GRIDURY_PARAM_NAME, nvt) && nvt.GetParameterType() == ParameterNameValueType::FloatType) { grid.upperright.y = nvt.GetValueFloat(); } else { return zeroGrid; } if (calvinCel.FindAlgorithmParameter(GRIDLRX_PARAM_NAME, nvt) && nvt.GetParameterType() == ParameterNameValueType::FloatType) { grid.lowerright.x = nvt.GetValueFloat(); } else { return zeroGrid; } if (calvinCel.FindAlgorithmParameter(GRIDLRY_PARAM_NAME, nvt) && nvt.GetParameterType() == ParameterNameValueType::FloatType) { grid.lowerright.y = nvt.GetValueFloat(); } else { return zeroGrid; } if (calvinCel.FindAlgorithmParameter(GRIDLLX_PARAM_NAME, nvt) && nvt.GetParameterType() == ParameterNameValueType::FloatType) { grid.lowerleft.x = nvt.GetValueFloat(); } else { return zeroGrid; } if (calvinCel.FindAlgorithmParameter(GRIDLLY_PARAM_NAME, nvt) && nvt.GetParameterType() == ParameterNameValueType::FloatType) { grid.lowerleft.y = nvt.GetValueFloat(); } else { return zeroGrid; } return grid; } // Index/position conversions /* */ int CalvinCELDataAdapter::IndexToX(int index) { return index % calvinCel.GetCols(); } /* */ int CalvinCELDataAdapter::IndexToY(int index) { return index / calvinCel.GetCols(); } /* */ int CalvinCELDataAdapter::XYToIndex(int x, int y) { return y*calvinCel.GetCols() + x; } // Accessors for intensity information. /* */ void CalvinCELDataAdapter::GetEntry(int index, FusionCELFileEntryType &entry) { float intensity; float stdev; int16_t numPixels; bool outlier, masked; calvinCel.GetData(index, intensity, stdev, numPixels, outlier, masked); entry.Intensity = (float)intensity; entry.Stdv = stdev; entry.Pixels = numPixels; } /* */ void CalvinCELDataAdapter::GetEntry(int x, int y, FusionCELFileEntryType &entry) { int index = XYToIndex(x,y); float intensity; float stdev; int16_t numPixels; bool outlier, masked; calvinCel.GetData(index, intensity, stdev, numPixels, outlier, masked); entry.Intensity = (float)intensity; entry.Stdv = stdev; entry.Pixels = numPixels; } /* */ float CalvinCELDataAdapter::GetIntensity(int index) { // allocate a vector, fill it with one item FloatVector v; calvinCel.GetIntensities(index, 1, v); // and return that one item. return v.at(0); } /* */ int CalvinCELDataAdapter::GetIntensities(int index,std::vector& intensities) { // Pass the vector along to get filled. calvinCel.GetIntensities(index, intensities.size(), intensities); // no errors for now. return 0; } /* */ float CalvinCELDataAdapter::GetIntensity(int x, int y) { int index = XYToIndex(x,y); FloatVector v; calvinCel.GetIntensities(index, 1, v); return v.at(0); } /* */ float CalvinCELDataAdapter::GetStdv(int index) { FloatVector v; calvinCel.GetStdev(index, 1, v); return v.at(0); } /* */ float CalvinCELDataAdapter::GetStdv(int x, int y) { int index = XYToIndex(x,y); FloatVector v; calvinCel.GetStdev(index, 1, v); return v.at(0); } /* */ short CalvinCELDataAdapter::GetPixels(int index) { Int16Vector v; calvinCel.GetNumPixels(index, 1, v); return v.at(0); } /* */ short CalvinCELDataAdapter::GetPixels(int x, int y) { int index = XYToIndex(x,y); Int16Vector v; calvinCel.GetNumPixels(index, 1, v); return v.at(0); } // Accessors for the mask/outlier flags /* */ bool CalvinCELDataAdapter::IsMasked(int x, int y) { BoolVector v; int32_t index = XYToIndex(x, y); if (calvinCel.GetMasked(index, 1, v)) return v.at(0); else return false; } /* */ bool CalvinCELDataAdapter::IsMasked(int index) { BoolVector v; if (calvinCel.GetMasked(index, 1, v)) return v.at(0); else return false; } /* */ bool CalvinCELDataAdapter::IsOutlier(int x, int y) { BoolVector v; int32_t index = XYToIndex(x, y); if (calvinCel.GetOutliers(index, 1, v)) return v.at(0); else return false; } /* */ bool CalvinCELDataAdapter::IsOutlier(int index) { BoolVector v; if (calvinCel.GetOutliers(index, 1, v)) return v.at(0); else return false; } bool CalvinCELDataAdapter::ReadHeader() { CelFileReader reader; try { reader.Read(calvinCel); } catch(...) { return false; } return true; } /* */ bool CalvinCELDataAdapter::Read(bool) { CelFileReader reader; try { reader.Read(calvinCel); } catch(...) { return false; } return true; } /* */ bool CalvinCELDataAdapter::ReadEx(const char *filename, int /*state*/) { calvinCel.SetFilename(filename); return Read(false); } /* * Returns the list of parameters associated with a data set */ ParameterNameValueTypeList CalvinCELDataAdapter::GetDataSetParameters(const std::wstring &setName) { return calvinCel.GetDataSetParameters(setName); } /* Sets the active data group for a multi-group CEL file. Default is the first group. */ void CalvinCELDataAdapter::SetActiveDataGroup(const std::wstring &groupName) { calvinCel.SetActiveChannel(groupName); } /* Is this a multi-color CEL file? */ bool CalvinCELDataAdapter::IsMultiColor() { return calvinCel.IsMultiColor(); } /* Returns a list of the channel (ie data group) names */ WStringVector CalvinCELDataAdapter::GetChannels() { return calvinCel.GetChannels(); } /* */ //void CalvinCELDataAdapter::SetDimensions(int rows, int cols) //{ // calvinCel.SetRows(rows); // calvinCel.SetCols(cols); //} /* */ //void CalvinCELDataAdapter::AddAlgorithmParameter(const wchar_t *tag, const wchar_t *value) //{ // ParameterNameValueType nvt; // nvt.SetName(tag); // nvt.SetValueText(value); // calvinCel.AddAlgorithmParameter(nvt); //} affxparser/src/fusion/calvin_files/fusion/src/CalvinAdapter/CalvinCELDataAdapter.h0000644000175200017520000002636314516003651031313 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixCalvinCELDataAdapter_HEADER_ #define _AffymetrixCalvinCELDataAdapter_HEADER_ /*! \file CalvinCELDataAdapter.h This file defines the Calvin Fusion CEL Data adapter classes */ #include "calvin_files/data/src/CELData.h" #include "calvin_files/fusion/src/FusionCELDataAdapterInterface.h" // namespace affymetrix_fusion_io { //////////////////////////////////////////////////////////////////// /*! */ class CalvinCELDataAdapter : public IFusionCELDataAdapter { public: /*! \brief Constructor */ CalvinCELDataAdapter(); /*! \brief Destructor */ virtual ~CalvinCELDataAdapter(); public: /*! \brief Can this object read the file * \return If the cell file can be read. */ virtual bool CanReadFile(); /*! \brief Set the cell file name. * \param value The cell file name to be set */ virtual void SetFileName(const std::string& value); /*! \brief Get the cell file name. * \return The currently set cell file name. */ virtual std::string GetFileName() const; /*! \brief Get the id of the file (only valid for Command Console "calvin" files) * \return The unique file id. */ virtual affymetrix_calvin_utilities::AffymetrixGuidType GetFileId(); /*! \brief Set the error message. * \param value The error message to be set. */ virtual void SetError(const wchar_t* value) {} // do nothing /*! \brief Get the currently set error message string. * \return The error message. */ virtual std::wstring GetError() { return L""; } /*! \brief Get the header key. * \return The header key. */ virtual std::wstring GetHeaderKey(const wchar_t* key) { return L""; } /*! \brief Get the version of the cell file. * \return The cell file version. */ virtual int GetVersion() { return calvinCel.GetVersion(); } /*! \brief Get the number of columns. * \return The number of columns */ virtual int GetCols() { return calvinCel.GetCols(); } /*! \brief Get the number of rows. * \return The number of rows. */ virtual int GetRows() { return calvinCel.GetRows(); } /*! \brief Get number of cells * \return The number of cells */ virtual int GetNumCells() { return calvinCel.GetNumCells(); } /*! \brief Get the header string. * \return The header as a string. */ virtual std::wstring GetHeader() { return L""; } /*! \brief Get the algorithm name. * \return The algorithm name. */ virtual std::wstring GetAlg() { return calvinCel.GetAlgorithmName(); } /*! \brief Get the algorithm version. * \return The algorithm version. */ virtual std::wstring GetAlgVer() { return calvinCel.GetAlgorithmVersion(); } /*! \brief Get the parameters. * \return The parameters used for creating the cell file. */ virtual std::wstring GetParams(); /*! \brief Get a parameter. * \param tag Parameter name associated with a parameter value. * \return The parameter value. */ virtual std::wstring GetAlgorithmParameter(const wchar_t *tag); /*! \brief Get parameter. * \param index The index to the parameter array. * \return The parameter value. */ virtual std::wstring GetAlgorithmParameterTag(int index); /*! \brief Get number of parameters. * \return The number of parameters. */ virtual int GetNumberAlgorithmParameters() ; /*! \brief Get the number of parameters. * \return The number of parameters. */ virtual std::wstring GetAlgorithmParameters(); /*! \brief Get parameters. * \param values Collection of name/value type parameter list. */ virtual void GetParameters(FusionTagValuePairTypeList& values); /*! \brief Get the DatHeader string. * Attempts to read the DatHeader parameter of the DAT generic data header. * \return The DatHeader parameter value if found, otherwise an empty string. */ virtual std::wstring GetDatHeader(); /*! \brief Get chip type. * \return The chip type of the cell. */ virtual std::wstring GetChipType() { return calvinCel.GetArrayType(); } /*! \brief Get the library package * \return library package name */ std::wstring GetLibraryPackageName() { return calvinCel.GetLibraryPackageName(); } /*! \brief Get the master file * \return master file name */ std::wstring GetMasterFileName() { return calvinCel.GetMasterFileName(); } /*! \brief Get cell margin. \return The cell margin. */ virtual int GetCellMargin(); /*! \brief Get number of outliers. * \return The number of outliers. */ virtual unsigned int GetNumOutliers(); /*! \brief Get number of masked cells. * \return The number of masked cells. */ virtual unsigned int GetNumMasked(); /*! \brief Get the grid coordinates. * \return Returns the grid coordinates. */ virtual FGridCoords GetGridCorners(); // Index/position conversions /*! \brief Translate index to X. * \param index The index to translate for x. * \return The translated index to x value. */ virtual int IndexToX(int index); /*! \brief Translate index to Y. * \param index The index to translate for y. * \return The translated index to y value. */ virtual int IndexToY(int index); /*! \brief Translate X and Y to an index. * \param x X coordinate. * \param y Y coordinate. * \return The translated index from x and y. */ virtual int XYToIndex(int x, int y); // Accessors for intensity information. /*! \brief Get entry by index. * \param index Entry index. * \param entry Entry to be filled from index. */ virtual void GetEntry(int index, FusionCELFileEntryType &entry); /*! \brief Get entry by x and y. * \param x X position. * \param y Y position. * \param entry Entry to be filled from x and y. */ virtual void GetEntry(int x, int y, FusionCELFileEntryType &entry); /*! \brief Get intensity by index position. * \param index Location of intensity * \return The intensity value. */ virtual float GetIntensity(int index); /*! @brief Get a vector of intensities * @param index Index of first intensity * @param intensities vector of intensities to fill * @return non-zero on error. */ virtual int GetIntensities(int index,std::vector& intensities); /*! \brief Get intensity by x, y position. * \param x X position. * \param y Y position. * \return The intensity value. */ virtual float GetIntensity(int x, int y); /*! \brief Get standard deviation by index position. * \param index Location of stdv. * \return The standard deviation value. */ virtual float GetStdv(int index); /*! \brief Get standard deviation by x, y position. * \param x X position. * \param y Y position. * \return The standard deviation value. */ virtual float GetStdv(int x, int y); /*! \brief Get pixel by index position. * \param index Location of pixel. * \return The pixel value. */ virtual short GetPixels(int index); /*! \brief Get pixel x, y position. * \param x X position. * \param y Y position. * \return The pixel value. */ virtual short GetPixels(int x, int y); // Accessors for the mask/outlier flags /*! \brief Get masked x, y position. * \param x X position. * \param y Y position. * \return Is index position masked. */ virtual bool IsMasked(int x, int y); /*! \brief Check if masked by index position. * \param index Location to check. * \return Is index position masked. */ virtual bool IsMasked(int index); /*! \brief Check if outlier by index position. * \param x X position. * \param y Y position. * \return Is index position an outlier. */ virtual bool IsOutlier(int x, int y); /*! \brief Check if outlier by index position. * \param index Location to check. * \return Is index position an outlier. */ virtual bool IsOutlier(int index); // For reading a file. /*! Close the cell file. */ virtual void Close() {} /*! \brief Close cell file. */ virtual bool ReadHeader(); /*! \brief Read the cell file. * \param bIncludeMaskAndOutliers Flag indicates whether to include in the read, the reading of outliers and masked items. * \return If the read completed successfully. */ virtual bool Read(bool bIncludeMaskAndOutliers); /*! \brief read cell file. * * The state flag is used for GCOS files only. * * \param filename Cell file name to read. * \param state [=CEL_ALL] Reading state * \return If the read completed successfully. * \a nState can be one or combination of the following values:\n\n * CEL_ALL Read all information in file (default)\n * CEL_DATA Read header and intensities only\n * CEL_OUTLIER Read header, intensities and outliers\n * CEL_MASK Read header, intensities and masked cells\n\n */ virtual bool ReadEx(const char *filename, int state); /*! \brief Get the reading state * \return The reading state. */ virtual int GetReadState() { return 1; /* CEL_ALL, always*/} /*! \brief clears the members. */ virtual void Clear() { calvinCel.Clear(); } /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ virtual affymetrix_calvin_io::GenericData *GetGenericData(); /*! Returns the list of parameters associated with a data set. * @param setName The data set name * @return The list of parameters */ virtual affymetrix_calvin_parameter::ParameterNameValueTypeList GetDataSetParameters(const std::wstring &setName); /*! Sets the active data group for a multi-group CEL file. Default is the first group. */ virtual void SetActiveDataGroup(const std::wstring &groupName); /*! Is this a multi-color CEL file? * @return True if it is multi-color */ virtual bool IsMultiColor(); /*! Returns a list of the channel (ie data group) names * @return list of channel names */ virtual WStringVector GetChannels(); // Sets the data values /*! \brief Sets the name of the algorithm used to create the CEL file. * \param str The algorithm name. */ // virtual void SetAlgorithmName(const wchar_t *str) { calvinCel.SetAlgorithmName(str); } /*! \brief Add algorithm parameter. * \param tag Parameter name. * \param value Parameter value. */ // virtual void AddAlgorithmParameter(const wchar_t *tag, const wchar_t *value); /*! \brief Set the number of rows and columns. * \param rows Number of rows. * \param cols Number of columns. */ // virtual void SetDimensions(int rows, int cols); /*! \brief Set the chip type. * \param str Chip type. */ // virtual void SetChipType(const wchar_t *str) { calvinCel.SetArrayType(str); } /*! \brief Set the margin. * \param margin Margin value to set. */ // virtual void SetMargin(int margin) {} protected: /*! The underlying data access object */ affymetrix_calvin_io::CelFileData calvinCel; }; } #endif //_AffymetrixCalvinCELDataAdapter_HEADER_ affxparser/src/fusion/calvin_files/fusion/src/CalvinAdapter/CalvinCHPDataAdapter.cpp0000644000175200017520000003136214516003651031650 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/CalvinAdapter/CalvinCHPDataAdapter.h" // #include "calvin_files/utils/src/StringUtils.h" // using namespace affxchp; using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_io; /*! Constructor */ CalvinCHPHeaderAdapter::CalvinCHPHeaderAdapter(CHPData* chp) : calvinChp(chp) { } /*! Destructor */ CalvinCHPHeaderAdapter::~CalvinCHPHeaderAdapter() { } /*! Gets the assay type * @return The assay type (FusionExpression, FusionGenotyping, FusionResequencing, FusionUniversal, FusionUnknown) */ AssayType CalvinCHPHeaderAdapter::GetAssayType() const { AssayType result = FusionUnknown; std::string tmp = calvinChp->GetAssayType(); if(tmp == CHP_EXPRESSION_ASSAY_TYPE) { result = FusionExpression; } else if(tmp == CHP_GENOTYPING_ASSAY_TYPE) { result = FusionGenotyping; } else if(tmp == CHP_RESEQUENCING_ASSAY_TYPE) { result = FusionResequencing; } else if(tmp == CHP_UNIVERSAL_ASSAY_TYPE) { result = FusionUniversal; } return result; } /*! Gets the number of columns * @return The number of columns */ int CalvinCHPHeaderAdapter::GetCols() const { return calvinChp->GetCols(); } /*! Gets the number of rows * @return The number of rows */ int CalvinCHPHeaderAdapter::GetRows() const { return calvinChp->GetRows(); } /*! Gets the number of probesets * @return The number of probesets */ int CalvinCHPHeaderAdapter::GetNumProbeSets() const { return calvinChp->GetEntryCount(); } /*! Get the chip (array) type * @return The chip (array) type */ std::wstring CalvinCHPHeaderAdapter::GetChipType() const { return calvinChp->GetArrayType(); } /*! Gets the algorithm name * @return The algorithm name */ std::wstring CalvinCHPHeaderAdapter::GetAlgName() const { return calvinChp->GetAlgName(); } /*! Gets the algorithm version * @return The version */ std::wstring CalvinCHPHeaderAdapter::GetAlgVersion() const { return calvinChp->GetAlgVersion(); } /*! Gets the algorithm parameters * @return The number of feature columns */ void CalvinCHPHeaderAdapter::GetAlgorithmParameters(FusionTagValuePairTypeList& values) { ParameterNameValueTypeVector nvt=calvinChp->GetAlgParams(); values = Convert(nvt); } u_int32_t CalvinCHPHeaderAdapter::GetAlgorithmParameterCount() { return (u_int32_t)calvinChp->GetAlgParams().size(); } u_int32_t CalvinCHPHeaderAdapter::GetSummaryParameterCount() { return (u_int32_t)calvinChp->GetChipSums().size(); } FusionTagValuePairTypeList CalvinCHPHeaderAdapter::Convert(ParameterNameValueTypeVector& nvt) { FusionTagValuePairTypeList list; FusionTagValuePairType type; for(ParameterNameValueTypeVector::size_type i = 0; i < nvt.size(); ++i) { type.Tag = nvt[i].GetName(); type.Value = nvt[i].ToString(); type.DetailedType() = nvt[i]; list.push_back(type); } return list; } /*! Gets the summary parameters */ void CalvinCHPHeaderAdapter::GetSummaryParameters(FusionTagValuePairTypeList& values) { ParameterNameValueTypeVector nvt=calvinChp->GetChipSums(); values = Convert(nvt); } /*! Gets the parent CEL file * @return The parent CEL file */ std::wstring CalvinCHPHeaderAdapter::GetParentCellFile() const { return calvinChp->GetParentCell(); } /*! Gets the prog ID * @return The prog ID */ std::wstring CalvinCHPHeaderAdapter::GetProgID() const { return calvinChp->GetProgId(); } /*! Gets a specific algorithm parameter given a name/tag * @return The specific algorithm parameter given a name/tag */ std::wstring CalvinCHPHeaderAdapter::GetAlgorithmParameter(const wchar_t *tag) { std::wstring p = tag; return calvinChp->GetAlgParam(p).ToString(); } /*! Gets a specific summary parameter given a name/tag * @return The specific summary parameter given a name/tag */ std::wstring CalvinCHPHeaderAdapter::GetSummaryParameter(const wchar_t *tag) { std::wstring p = tag; return calvinChp->GetChipSum(p).ToString(); } /*! Gets the magic number * @return The magic number */ int CalvinCHPHeaderAdapter::GetMagic() const { return calvinChp->GetMagic(); } /*! Gets the version number * @return The version number */ int CalvinCHPHeaderAdapter::GetVersion() const { return calvinChp->GetVersion(); } /*! Gets the background zone information * @return The background zone information */ void CalvinCHPHeaderAdapter::GetBackgroundZoneInfo(BackgroundZoneInfo& info) { int32_t count = calvinChp->GetBackgroundZoneCnt(); info.number_zones = (int)count; info.smooth_factor = 0.0f; CHPBackgroundZoneVector zones; calvinChp->GetBackgroundZones(0,count,zones); CHPBackgroundZoneVector::iterator begin = zones.begin(); CHPBackgroundZoneVector::iterator end = zones.end(); // Get the smooth factor from the first zone. if (begin != end) info.smooth_factor = begin->GetSmoothFactor(); for(;begin != end; ++begin) { BackgroundZoneType bzt; bzt.background = begin->GetBackground(); bzt.centerx = begin->GetCenterX(); bzt.centery = begin->GetCenterY(); info.zones.push_back(bzt); } } /*! Gets the list of background zone positions and values * @return The list of background zone positions and values */ void CalvinCHPHeaderAdapter::GetBackgroundZones(BackgroundZoneTypeList& zones) { int32_t count = calvinChp->GetBackgroundZoneCnt(); CHPBackgroundZoneVector vec; calvinChp->GetBackgroundZones(0,count,vec); CHPBackgroundZoneVectorIt begin = vec.begin(), end = vec.end(); for(; begin != end; begin++) { BackgroundZoneType type; type.background = begin->GetBackground(); type.centerx = begin->GetCenterX(); type.centery = begin->GetCenterY(); zones.push_back(type); } } /*! Gets the background value for a given center coordinate * @return The background value for a given center coordinate */ void CalvinCHPHeaderAdapter::GetBackgroundZone(BackgroundZoneType& type, int x, int y) { CHPBackgroundZone zone; calvinChp->GetBackgroundZone(x,zone); type.background = zone.GetBackground(); type.centerx = zone.GetCenterX(); type.centery = zone.GetCenterY(); } ////////////////////////////// // data section /* * Constructor */ CalvinCHPDataAdapter::CalvinCHPDataAdapter() { header = new CalvinCHPHeaderAdapter(&calvinChp); } /* * Destructor */ CalvinCHPDataAdapter::~CalvinCHPDataAdapter() { calvinChp.Clear(); delete header; } /*! Accessors to header. * @return The header data object */ IFusionCHPHeaderAdapter& CalvinCHPDataAdapter::GetHeader() { return *header; } /*! Returns the expression probe set result * @param index The index to the result object of interest. * @param result The expression result. * @return True if the expression result was found. */ bool CalvinCHPDataAdapter::GetExpressionResults(int index, FusionExpressionProbeSetResults& result) { if(calvinChp.GetAssayType() == CHP_EXPRESSION_ASSAY_TYPE) { CHPExpressionEntry entry; // row is the same as index calvinChp.GetEntry(index, entry); result.SetDetectionPValue(entry.GetDetectionPValue()); /*! The signal value */ result.SetSignal(entry.GetSignal()); /*! The number of probe pairs in the set */ result.SetNumPairs(entry.GetNumPairs()); /*! The number of probe pairs used to calculate the signal value */ result.SetNumUsedPairs(entry.GetNumPairsUsed()); /*! The detection call */ result.SetDetection(entry.GetDetection()); /*! Flag indicating that comparison results exist */ result.SetHasCompResults(entry.GetHasComparisonData()); /*! The change p-value */ result.SetChangePValue(entry.GetChangePValue()); /*! The signal log ratio */ result.SetSignalLogRatio(entry.GetSigLogRatio()); /*! The signal log ratio low value */ result.SetSignalLogRatioLow(entry.GetSigLogRatioLo()); /*! The signal log ratio high value */ result.SetSignalLogRatioHigh(entry.GetSigLogRatioHi()); /*! The number of probe pairs in common between the experiment and baseline data */ result.SetNumCommonPairs(entry.GetCommonPairs()); /*! The change call */ result.SetChange(entry.GetChange()); return true; } return false; } /*! Returns the genotyping probe set result * @param index The index to the result object of interest. * @param result The genotyping result. * @return True if the genotyping result was found. */ bool CalvinCHPDataAdapter::GetGenotypingResults(int index, FusionGenotypeProbeSetResults& result) { if(calvinChp.GetAssayType() == CHP_GENOTYPING_ASSAY_TYPE) { CHPGenotypeEntry entry; calvinChp.GetEntry(index, entry); result.SetAlleleCall(entry.GetCall()); result.SetConfidence(entry.GetConfidence()); result.SetRAS1(entry.GetRAS1()); result.SetRAS2(entry.GetRAS2()); result.SetPValueAA(entry.GetAACall()); result.SetPValueAB(entry.GetABCall()); result.SetPValueBB(entry.GetBBCall()); result.SetPValueNoCall(entry.GetNoCall()); return true; } return false; } /*! Returns the universal (tag array) probe set result * @param index The index to the result object of interest. * @param The universal result. * @return True if the universal result was found. */ bool CalvinCHPDataAdapter::GetUniversalResults(int index, FusionUniversalProbeSetResults& result) { if(calvinChp.GetAssayType() == CHP_UNIVERSAL_ASSAY_TYPE) { CHPUniversalEntry entry; calvinChp.GetEntry(index, entry); result.SetBackground(entry.GetBackground()); return true; } return false; } bool CalvinCHPDataAdapter::GetResequencingResults(FusionResequencingResults& results) { if(calvinChp.GetAssayType() == CHP_RESEQUENCING_ASSAY_TYPE) { CHPReseqEntry entry; int32_t sz = calvinChp.GetEntryCount(); results.ResizeCalledBases(sz); results.ResizeScores(sz); for(int i = 0; i < sz; i++) { calvinChp.GetEntry(i, entry); results.SetCalledBase(i, entry.call); results.SetScore(i, entry.score); } sz = calvinChp.GetForceCnt(); results.ResizeForceCalls(sz); CHPReseqForceCall f; for(int i = 0; i < sz; i++) { calvinChp.GetForceCall(i, f); FusionForceCallType fusionType(f.position, f.call, f.reason); results.SetForceCall(i, fusionType); } sz = calvinChp.GetOrigCnt(); results.ResizeOrigCalls(sz); CHPReseqOrigCall b; for(int i = 0; i < sz; i++) { calvinChp.GetOrigCall(i, b); FusionBaseCallType fusionType(b.position, b.call); results.SetOrigCall(i, fusionType); } return true; } return false; } // Functions to read file. bool CalvinCHPDataAdapter::Read() { CHPFileReader reader; try { reader.Read(calvinChp); } catch(...) { return false; } return true; } /*! Reads the header of the CHP file * @return True if successful */ bool CalvinCHPDataAdapter::ReadHeader() { CHPFileReader reader; try { reader.Read(calvinChp); } catch(...) { return false; } return true; } /*! Sets the file name. * @param name The full path to the CHP file */ void CalvinCHPDataAdapter::SetFileName(const std::string& value) { calvinChp.SetFilename(value); } /*! Gets the file name. * @return The full path to the CHP file. */ std::string CalvinCHPDataAdapter::GetFileName() const { return calvinChp.GetFilename(); } /*! Deallocates any memory used by the class object */ void CalvinCHPDataAdapter::Clear() { calvinChp.Clear(); } /* * Can this object read the file */ bool CalvinCHPDataAdapter::CanReadFile() { CHPFileReader reader; try { reader.Read(calvinChp); } catch(affymetrix_calvin_exceptions::InvalidFileTypeException&) { return false; } return true; } /* * Get the id of the file */ AffymetrixGuidType CalvinCHPDataAdapter::FileId() { return calvinChp.GetFileHeader()->GetGenericDataHdr()->GetFileId(); } /* * Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ GenericData *CalvinCHPDataAdapter::GetGenericData() { return &calvinChp.GetGenericData(); } /* * Get the probe set name */ std::string CalvinCHPDataAdapter::GetProbeSetName(int index) { if (calvinChp.GetAssayType() == CHP_EXPRESSION_ASSAY_TYPE) { CHPExpressionEntry entry; calvinChp.GetEntry(index, entry); return entry.GetProbeSetName(); } else if (calvinChp.GetAssayType() == CHP_GENOTYPING_ASSAY_TYPE) { CHPGenotypeEntry entry; calvinChp.GetEntry(index, entry); return entry.GetProbeSetName(); } return ""; } affxparser/src/fusion/calvin_files/fusion/src/CalvinAdapter/CalvinCHPDataAdapter.h0000644000175200017520000002022014516003651031304 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixCalvinCHPDataAdapter_HEADER_ #define _AffymetrixCalvinCHPDataAdapter_HEADER_ /*! \file CalvinCHPDataAdapter.h This file defines the GCOS Fusion CHP Data adapter classes. */ #include "calvin_files/data/src/CHPData.h" #include "calvin_files/data/src/CHPExpressionEntry.h" #include "calvin_files/data/src/CHPGenotypeEntry.h" #include "calvin_files/data/src/CHPUniversalEntry.h" #include "calvin_files/fusion/src/FusionCHPDataAdapterInterface.h" #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/parsers/src/CHPFileReader.h" // #include "file/CHPFileData.h" // #include #include // using namespace affymetrix_calvin_io; namespace affymetrix_fusion_io { /*! \brief Header class for the Calvin CHP adapter */ class CalvinCHPHeaderAdapter : public IFusionCHPHeaderAdapter { public: /*! \brief Constructor * \param chp Calvin CHP data object. */ CalvinCHPHeaderAdapter(CHPData* chp); /*! \brief Destructor */ ~CalvinCHPHeaderAdapter(); public: /*! \brief Get numbers columns. * \return Number of columns. */ virtual int GetCols() const; /*! \brief Get number of rows. * \return Number of rows. */ virtual int GetRows() const; /*! \brief Get the number of probesets. * \return Number of probesets. */ virtual int GetNumProbeSets() const; /*! \brief Get assay type. * \return Assay Type. * * \a Assay Types * FusionExpression - Expression assay * FusionGenotyping - Genotyping assay * FusionResequencing - Resequencing assay * FusionUniversal - Tag (universal) assay * FusionUnknown - Unknown assay type */ virtual AssayType GetAssayType() const; /*! \brief Get chip type. * \return The chip type. */ virtual std::wstring GetChipType() const; /*! \brief Get algorithm name. * \return Algorithm name. */ virtual std::wstring GetAlgName() const; /*! \brief Get algorithm version. * \return Algorithm version number. */ virtual std::wstring GetAlgVersion() const; /*! \brief Gets the algorithm parameters. * \param values Name/Value type list to be filled. */ virtual void GetAlgorithmParameters(affymetrix_fusion_io::FusionTagValuePairTypeList& values); /*! \brief Gets the algorithm parameter count. * \return Number of algorithm parameters. */ virtual u_int32_t GetAlgorithmParameterCount(); /*! \brief Gets the summary parameter count. * \return Number of summary parameters. */ virtual u_int32_t GetSummaryParameterCount(); /*! \brief Gets summary parameters. * \param values Name/Value type list to be filled. */ virtual void GetSummaryParameters(affymetrix_fusion_io::FusionTagValuePairTypeList& values); /*! \brief Gets the parent CEL file. * \return Parent CEL file name. */ virtual std::wstring GetParentCellFile() const; /*! \brief Gets the prog ID. * \return Prog ID */ virtual std::wstring GetProgID() const; /*! \brief Gets a specific algorithm parameter given a name/tag. * \param tag Parameter name. * \return Specific algorithm parameter given a name/tag */ virtual std::wstring GetAlgorithmParameter(const wchar_t *tag); /*! \brief Gets a specific summary parameter given a name/tag. * \param tag Parameter name. * \return Specific summary parameter given a name/tag. */ virtual std::wstring GetSummaryParameter(const wchar_t *tag); /*! \brief Gets the background zone information. * \param info Background zone information to be filled. */ virtual void GetBackgroundZoneInfo(affxchp::BackgroundZoneInfo& info); /*! \brief Gets the list of background zone positions and values. * \param zones List of background zone positions and values to be filled. */ virtual void GetBackgroundZones(affxchp::BackgroundZoneTypeList& zones); /*! \brief Gets the background value for a given center coordinate. * \param type Background value for a given center coordinate to be filled. * \param x X position of zone. * \param y Y position of zone. */ virtual void GetBackgroundZone(affxchp::BackgroundZoneType& type, int x, int y); /*! \brief Gets the magic number. * \return Magic number. */ virtual int GetMagic() const; /*! \brief Gets the version number. * \return Version number */ virtual int GetVersion() const; private: /*! Converts from ParameterNameValueTypeVector to FusionTagValuePairTypeList. * \param nvt Parameter name value type parir vector * \return Coverted vector to a FusionTagValuePairTypeList. */ FusionTagValuePairTypeList Convert(ParameterNameValueTypeVector& nvt); /*! A pointer to the root data access object */ CHPData* calvinChp; }; /*! \brief Data class for the Calvin CHP adapter */ class CalvinCHPDataAdapter : public IFusionCHPDataAdapter { public: /*! \brief Constructor */ CalvinCHPDataAdapter(); virtual ~CalvinCHPDataAdapter(); /*! \brief Accessors to header. * \return Header object */ virtual IFusionCHPHeaderAdapter& GetHeader(); /*! \brief Can this object read the file. * \return If the cell file can be read. */ virtual bool CanReadFile(); /*! Get the probe set name (only valid for Command Console "calvin" files) * @param index The index to the result object of interest. * @return The probe set name. */ virtual std::string GetProbeSetName(int index); /*! \brief Returns the expression probe set result * \param index Index to the result object of interest. * \param result Expression result. * \return True if the expression result was found. */ virtual bool GetExpressionResults(int index, affymetrix_fusion_io::FusionExpressionProbeSetResults& result); /*! \brief Returns the genotyping probe set result * \param index Index to the result object of interest. * \param result Genotyping result. * \return True if the genotyping result was found. */ virtual bool GetGenotypingResults(int index, FusionGenotypeProbeSetResults& result); /*! \brief Returns the universal (tag array) probe set result * \param index Index to the result object of interest. * \param result Universal result. * \return True if the universal result was found. */ virtual bool GetUniversalResults(int index, FusionUniversalProbeSetResults& result); /*! \brief Gets resequencing results. * \param results Hold the resequencing results. * \return True if resequencing results were retrieved. */ virtual bool GetResequencingResults(FusionResequencingResults& results); /*! \brief Functions to read file. * \return True if the cell file was read. */ virtual bool Read(); /*! \brief Reads the header of the CHP file. * \return True if successful */ virtual bool ReadHeader(); /*! \brief Sets the file name. * \param value Full path to the CHP file */ virtual void SetFileName(const std::string& value); /*! \brief Gets the file name. * \return Full path to the CHP file. */ virtual std::string GetFileName() const; /*! \brief Deallocates any memory used by the class object. */ virtual void Clear(); /*! Get the id of the file (only valid for Command Console "calvin" files) * @return The unique file id. */ affymetrix_calvin_utilities::AffymetrixGuidType FileId(); /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ affymetrix_calvin_io::GenericData *GetGenericData(); protected: /*! The underlying data access object */ CHPData calvinChp; /*! Header adapter */ CalvinCHPHeaderAdapter* header; }; } #endif // _AffymetrixGCOSCHPDataAdapter_HEADER_ affxparser/src/fusion/calvin_files/fusion/src/FusionArrayFileReader.cpp0000644000175200017520000001103114516003651027445 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionArrayFileReader.h" // #include "calvin_files/parsers/src/ArrayFileReader.h" #include "calvin_files/utils/src/StringUtils.h" // #include "file/DttArrayFileReader.h" #include "file/EXPFileData.h" // using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_array; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_io; using namespace affymetrix_dttarray; using namespace affymetrix_calvin_parameter; using namespace affxexp; using namespace std; /* * Clear any members. */ FusionArrayFileReader::FusionArrayFileReader() { } /* * Clear any members. */ FusionArrayFileReader::~FusionArrayFileReader() { } /* * Read the file into the array data parameter. * First try the calvin array file, * next the DTT array file, * lastly the EXP file. * * The DTT array file will contain the probe array type and a list of * user attributes. Copy these attributes plus the array type to the * array data object. The probe array type will go in the physical * array section. * * The EXP array file will contain the probe array type and a list of * sample parameters. Copy these to the array data object. The probe * array type will go in the physical array section. * */ bool FusionArrayFileReader::Read(const string &fileName, ArrayData &arrayData) { arrayData.Clear(); // The calvin array file. ArrayFileReader arrayReader; if (ArrayFileReader::IsFileType(fileName, ARRAY_SET_FILE_TYPE_IDENTIFIER) == true) { if (arrayReader.Read(fileName, arrayData) == true) return true; } // The probe array type name. wstring probeArrayTypeName = L"Probe Array Type"; // The DTT or Exporter SDK MAGE-ML file. DttArrayData dttData; DttArrayFileReader dttReader; dttReader.SetFileName(fileName.c_str()); if (dttReader.Read(dttData) == true) { ParameterNameValueDefaultRequiredTypeList &userAtts = arrayData.UserAttributes(); ParameterNameValueDefaultRequiredType userAtt; AttributeNameValueTypeList &atts = dttData.Attributes(); int paramIndex=0; for (AttributeNameValueTypeList::iterator it=atts.begin(); it!=atts.end(); it++, paramIndex++) { AttributeNameValueType param = *it; userAtt.SetName(StringUtils::ConvertMBSToWCS(param.name)); userAtt.SetValueText(StringUtils::ConvertMBSToWCS(param.value)); userAtts.push_back(userAtt); } ArrayAttributesVector &physArrays = arrayData.PhysicalArraysAttributes(); physArrays.resize(1); ArrayAttributes &physArray = physArrays[0]; ParameterNameValuePair nameValueParam; nameValueParam.Name = probeArrayTypeName; nameValueParam.Value = StringUtils::ConvertMBSToWCS(dttData.GetArrayType()); physArray.Attributes().push_back(nameValueParam); return true; } // The MAS EXP file. CEXPFileData expReader; expReader.SetFileName(fileName.c_str()); if (expReader.Read() == true) { ParameterNameValueDefaultRequiredTypeList &userAtts = arrayData.UserAttributes(); ParameterNameValueDefaultRequiredType userAtt; TagValuePairTypeList &atts = expReader.GetSampleParameters(); int paramIndex=0; for (TagValuePairTypeList::iterator it=atts.begin(); it!=atts.end(); it++, paramIndex++) { TagValuePairType param = *it; userAtt.SetName(StringUtils::ConvertMBSToWCS(param.Tag)); userAtt.SetValueText(StringUtils::ConvertMBSToWCS(param.Value)); userAtts.push_back(userAtt); } ArrayAttributesVector &physArrays = arrayData.PhysicalArraysAttributes(); physArrays.resize(1); ArrayAttributes &physArray = physArrays[0]; ParameterNameValuePair nameValueParam; nameValueParam.Name = probeArrayTypeName; nameValueParam.Value = StringUtils::ConvertMBSToWCS(expReader.GetArrayType()); physArray.Attributes().push_back(nameValueParam); return true; } return false; } affxparser/src/fusion/calvin_files/fusion/src/FusionArrayFileReader.h0000644000175200017520000000441514516003651027122 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionArrayFile_HEADER_ #define _AffymetrixFusionArrayFile_HEADER_ /*! \file FusionArrayFileReader.h This file provides fusion classes for reading array files. */ #include "calvin_files/array/src/ArrayData.h" // #include #include // namespace affymetrix_fusion_io { /*! This class provides the fusion parsing interfaces for reading array files. * * The array files can be one of the following: * MAS5 EXP files * MAGE-ML XML files generated by the DTT version 1.1 software * MAGE-ML XML files generated by the GDAC Exporter SDK * * In all cases the parser will populate an array data object. */ class FusionArrayFileReader { protected: /*! An identifier to the type of data stored in the file */ affymetrix_calvin_utilities::AffymetrixGuidType dataTypeIdentifier; public: /*! Constructor */ FusionArrayFileReader(); /*! Destructor */ ~FusionArrayFileReader(); /*! Reads the entire contents of the file. * @param fileName The name of the array file to read. * @param arrayData The array data to read from the file. */ bool Read(const std::string &fileName, affymetrix_calvin_array::ArrayData &arrayData); /*! The identifier of the type of data stored in the file. * @return The identifier of the type of data. */ const affymetrix_calvin_utilities::AffymetrixGuidType &DataTypeIdentifier() const { return dataTypeIdentifier; } }; }; #endif // _AffymetrixFusionArrayFile_HEADER_ affxparser/src/fusion/calvin_files/fusion/src/FusionBARData.cpp0000644000175200017520000000166414516003651025655 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionBARData.h" // using namespace affymetrix_fusion_io; affxparser/src/fusion/calvin_files/fusion/src/FusionBARData.h0000644000175200017520000000225614516003651025320 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionBARData_HEADER_ #define _AffymetrixFusionBARData_HEADER_ /*! \file FusionBARData.h This file defines the Fusion BAR Data classes */ #include "file/BARFileData.h" // using namespace affxbar; namespace affymetrix_fusion_io { /*! Storage for the BAR file. */ class FusionBARFile : public CBARFileData { }; } #endif affxparser/src/fusion/calvin_files/fusion/src/FusionBEDData.cpp0000644000175200017520000000166414516003651025643 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionBEDData.h" // using namespace affymetrix_fusion_io; affxparser/src/fusion/calvin_files/fusion/src/FusionBEDData.h0000644000175200017520000000225514516003651025305 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionBEDData_HEADER_ #define _AffymetrixFusionBEDData_HEADER_ /*! \file FusionBEDData.h This file defines the Fusion BED Data classes */ #include "file/BEDFileData.h" // using namespace affxbed; namespace affymetrix_fusion_io { /*! Storage for the BED file. */ class FusionBEDFile : public BEDFileData { }; } #endif affxparser/src/fusion/calvin_files/fusion/src/FusionBPMAPData.cpp0000644000175200017520000000166614516003651026112 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionBPMAPData.h" // using namespace affymetrix_fusion_io; affxparser/src/fusion/calvin_files/fusion/src/FusionBPMAPData.h0000644000175200017520000000230014516003651025541 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionBPMAPData_HEADER_ #define _AffymetrixFusionBPMAPData_HEADER_ /*! \file FusionBPMAPData.h This file defines the Fusion BPMAP Data classes */ #include "file/BPMAPFileData.h" // using namespace affxbpmap; namespace affymetrix_fusion_io { /*! Storage for the BPMAP file. */ class FusionBPMAPFile : public CBPMAPFileData { }; } #endif affxparser/src/fusion/calvin_files/fusion/src/FusionCDFData.cpp0000644000175200017520000007026214516003651025645 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionCDFData.h" // #include "calvin_files/fusion/src/FusionCDFQCProbeSetNames.h" #include "calvin_files/parsers/src/CDFFileReader.h" #include "calvin_files/parsers/src/GenericFileReader.h" #include "calvin_files/utils/src/FileUtils.h" #include "calvin_files/utils/src/StringUtils.h" // #include // using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_parameter; //////////////////////////////////////////////////////////////////////////////////////////// /* * Get CDF Format Version. */ int FusionCDFFileHeader::GetFormatVersion() const { if (gcosHeader) return gcosHeader->GetFormatVersion(); else if (calvinData) return calvinData->GetFormatVersion(); else return 0; } /* * Get the CDF GUID. */ std::string FusionCDFFileHeader::GetGUID() const { if (gcosHeader) return gcosHeader->GetGUID(); else return std::string(""); } /* * Get the integrity md5. */ std::string FusionCDFFileHeader::GetIntegrityMd5() const { if (gcosHeader) return gcosHeader->GetIntegrityMd5(); else return std::string(""); } /* * Get the number of feature columns in the array. */ int FusionCDFFileHeader::GetCols() const { if (gcosHeader) return gcosHeader->GetCols(); else if (calvinData) return calvinData->GetArrayCols(); else return 0; } /* * Get the number of feature rows in the array. */ int FusionCDFFileHeader::GetRows() const { if (gcosHeader) return gcosHeader->GetRows(); else if (calvinData) return calvinData->GetArrayRows(); else return 0; } /* * Get the number of probe sets. */ int FusionCDFFileHeader::GetNumProbeSets() const { if (gcosHeader) return gcosHeader->GetNumProbeSets(); else if (calvinData && calvinData->GetGenericData().Header().GetGenericDataHdr()->GetFileTypeId() != AFFY_CNTRL_PS) return calvinData->GetProbeSetCnt(); else return 0; } /* * Get the number of QC probe sets. */ int FusionCDFFileHeader::GetNumQCProbeSets() const { if (gcosHeader) return gcosHeader->GetNumQCProbeSets(); else if (calvinData && calvinData->GetGenericData().Header().GetGenericDataHdr()->GetFileTypeId() == AFFY_CNTRL_PS) return calvinData->GetProbeSetCnt(); else return 0; } /* * Get the reference sequence (for resequencing arrays only). */ std::string &FusionCDFFileHeader::GetReference() { return ref; } /* * Initializes the class based on GCOS information. */ void FusionCDFFileHeader::Initialize(affxcdf::CCDFFileData *data) { gcosHeader = &data->GetHeader(); calvinData = NULL; ref = gcosHeader->GetReference(); } /* * Initializes the class based on Calvin information. */ void FusionCDFFileHeader::Initialize(affymetrix_calvin_io::CDFData *data) { gcosHeader = NULL; calvinData = data; ref = calvinData->GetRefSequence(); } /* * Constructor */ FusionCDFFileHeader::FusionCDFFileHeader() { gcosHeader = NULL; calvinData = NULL; } //////////////////////////////////////////////////////////////////////////////////////////// /* * Initialize the class members. */ FusionCDFQCProbeInformation::FusionCDFQCProbeInformation() { calvinProbe = NULL; gcosProbe = NULL; } /* * Deallocate memory. */ FusionCDFQCProbeInformation::~FusionCDFQCProbeInformation() { Clear(); } /* * Initialize the class members. */ void FusionCDFQCProbeInformation::Initialize(int index, affxcdf::CCDFQCProbeSetInformation *gcosSet) { Clear(); gcosProbe = new affxcdf::CCDFQCProbeInformation; gcosSet->GetProbeInformation(index, *gcosProbe); } /* * Initialize the class members. */ void FusionCDFQCProbeInformation::Initialize(int index, CDFQCProbeSetInformation *calvinSet) { Clear(); calvinProbe = new CDFQCProbeInformation; calvinSet->GetProbeInformation(index, *calvinProbe); } /* * Clears the members. */ void FusionCDFQCProbeInformation::Clear() { delete calvinProbe; calvinProbe = NULL; delete gcosProbe; gcosProbe = NULL; } /*! Gets the X cooridnate of the probe. */ int FusionCDFQCProbeInformation::GetX() const { if (gcosProbe) return gcosProbe->GetX(); else if (calvinProbe) return calvinProbe->GetX(); else return 0; } /*! Gets the Y cooridnate of the probe. */ int FusionCDFQCProbeInformation::GetY() const { if (gcosProbe) return gcosProbe->GetY(); else if (calvinProbe) return calvinProbe->GetY(); else return 0; } /*! Gets the probe length. */ int FusionCDFQCProbeInformation::GetPLen() const { if (gcosProbe) return gcosProbe->GetPLen(); else if (calvinProbe) return calvinProbe->GetPLen(); else return 0; } /*! Gets the flag indicating if the probe is a perfect match probe. */ bool FusionCDFQCProbeInformation::IsPerfectMatchProbe() const { if (gcosProbe) return gcosProbe->IsPerfectMatchProbe(); else if (calvinProbe) return calvinProbe->IsPerfectMatchProbe(); else return false; } /*! Gets a flag indicating if the probe is used for background calculations (blank feature). */ bool FusionCDFQCProbeInformation::IsBackgroundProbe() const { if (gcosProbe) return gcosProbe->IsBackgroundProbe(); else if (calvinProbe) return calvinProbe->IsBackgroundProbe(); else return false; } //////////////////////////////////////////////////////////////////////////////////////////// /* * Get the probe set type. */ affxcdf::GeneChipQCProbeSetType FusionCDFQCProbeSetInformation::GetQCProbeSetType() const { if (gcosSet) return gcosSet->GetQCProbeSetType(); else if (calvinSet) { affxcdf::GeneChipQCProbeSetType qcType=affxcdf::UnknownQCProbeSetType; const std::wstring &qc = calvinSet->GetQCProbeSetType(); for (int i=(int)affxcdf::UnknownQCProbeSetType; i<=(int)affxcdf::SpatialNormalizationPositiveQCProbeSetType; i++) { if (qc == FusionCDFQCProbeSetNames::GetStaticCDFQCProbeSetName((affxcdf::GeneChipQCProbeSetType)i)) { qcType = (affxcdf::GeneChipQCProbeSetType)i; break; } } return qcType; } else return affxcdf::UnknownQCProbeSetType; } /* * Get the number of probes in the set. */ int FusionCDFQCProbeSetInformation::GetNumCells() const { if (gcosSet) return gcosSet->GetNumCells(); else if (calvinSet) return calvinSet->GetNumCells(); else return 0; } /* * Get the information about a single probe in the set. */ void FusionCDFQCProbeSetInformation::GetProbeInformation(int index, FusionCDFQCProbeInformation & info) { if (gcosSet) info.Initialize(index, gcosSet); else if (calvinSet) info.Initialize(index, calvinSet); else info.Clear(); } /* * Initialize the class. */ FusionCDFQCProbeSetInformation::FusionCDFQCProbeSetInformation() { gcosSet = NULL; calvinSet = NULL; } /* * Deallocate any used memory. */ FusionCDFQCProbeSetInformation::~FusionCDFQCProbeSetInformation() { Clear(); } /* * Create a GCOS QC probe set object and retrieve it from the CDF object. */ void FusionCDFQCProbeSetInformation::Initialize(int index, affxcdf::CCDFFileData *cdf) { Clear(); gcosSet = new affxcdf::CCDFQCProbeSetInformation; cdf->GetQCProbeSetInformation(index, *gcosSet); } /* * Create a GCOS QC probe set object and retrieve it from the CDF object. */ void FusionCDFQCProbeSetInformation::Initialize(affxcdf::GeneChipQCProbeSetType qcType, affxcdf::CCDFFileData *cdf) { Clear(); gcosSet = new affxcdf::CCDFQCProbeSetInformation; cdf->GetQCProbeSetInformation(qcType, *gcosSet); } /* * Create a Calvin QC probe set object and retrieve it from the CDF object. */ void FusionCDFQCProbeSetInformation::Initialize(int index, CDFData *cdf) { Clear(); calvinSet = new CDFQCProbeSetInformation; cdf->GetQCProbeSetInformation(index, *calvinSet); } /* * Create a Calvin QC probe set object and retrieve it from the CDF object. */ void FusionCDFQCProbeSetInformation::Initialize(affxcdf::GeneChipQCProbeSetType qcType, CDFData *cdf) { Clear(); calvinSet = new CDFQCProbeSetInformation; cdf->GetQCProbeSetInformation(qcType, *calvinSet); } /* * Deallocate any used memory. */ void FusionCDFQCProbeSetInformation::Clear() { delete calvinSet; calvinSet = NULL; delete gcosSet; gcosSet = NULL; } //////////////////////////////////////////////////////////////////////////////////////////// /* * Return the list index. */ int FusionCDFProbeInformation::GetListIndex() const { if (gcosProbe) return gcosProbe->GetListIndex(); else if (calvinProbe) return calvinProbe->GetListIndex(); else return 0; } /* * Return the expos value. */ int FusionCDFProbeInformation::GetExpos() const { if (gcosProbe) return gcosProbe->GetExpos(); else if (calvinProbe) return calvinProbe->GetExpos(); else return 0; } /* * Return the X coordinate. */ int FusionCDFProbeInformation::GetX() const { if (gcosProbe) return gcosProbe->GetX(); else if (calvinProbe) return calvinProbe->GetX(); else return 0; } /* * Return the Y coordinate. */ int FusionCDFProbeInformation::GetY() const { if (gcosProbe) return gcosProbe->GetY(); else if (calvinProbe) return calvinProbe->GetY(); else return 0; } /* * Return the probes base at the interrogation position. */ char FusionCDFProbeInformation::GetPBase() const { if (gcosProbe) return gcosProbe->GetPBase(); else if (calvinProbe) return (char) calvinProbe->GetPBase(); else //return NULL; return 0; } /* * Return the targets base at the interrogation position. */ char FusionCDFProbeInformation::GetTBase() const { if (gcosProbe) return gcosProbe->GetTBase(); else if (calvinProbe) return (char) calvinProbe->GetTBase(); else //return NULL; return 0; } /* * Return the probe length. */ unsigned short FusionCDFProbeInformation::GetProbeLength() const { if (gcosProbe) return gcosProbe->GetProbeLength(); else if (calvinProbe) return calvinProbe->GetProbeLength(); else return 0; } /* * Return the probe grouping. */ unsigned short FusionCDFProbeInformation::GetProbeGrouping() const { if (gcosProbe) return gcosProbe->GetProbeGrouping(); else if (calvinProbe) return calvinProbe->GetProbeGrouping(); else return 0; } /* * Initialize the class members. */ FusionCDFProbeInformation::FusionCDFProbeInformation() { gcosProbe = NULL; calvinProbe = NULL; } /* * Deallocate any memory. */ FusionCDFProbeInformation::~FusionCDFProbeInformation() { Clear(); } /* * Initialize the class members. */ void FusionCDFProbeInformation::Initialize(int index, affxcdf::CCDFProbeGroupInformation *gcosGroup) { Clear(); gcosProbe = new affxcdf::CCDFProbeInformation; gcosGroup->GetCell(index, *gcosProbe); } /* * Initialize the class members. */ void FusionCDFProbeInformation::Initialize(int index, affymetrix_calvin_io::CDFProbeGroupInformation *calvinGroup) { Clear(); calvinProbe = new CDFProbeInformation; calvinGroup->GetCell(index, *calvinProbe); } /* * Clears the members. */ void FusionCDFProbeInformation::Clear() { delete calvinProbe; calvinProbe = NULL; delete gcosProbe; gcosProbe = NULL; } //////////////////////////////////////////////////////////////////////////////////////////// /* * Get the groups direction. */ affxcdf::DirectionType FusionCDFProbeGroupInformation::GetDirection() const { if (gcosGroup) return gcosGroup->GetDirection(); else if (calvinGroup) { DirectionType dir = calvinGroup->GetDirection(); switch (dir) { case ProbeNoDirection: return affxcdf::NoDirection; break; case ProbeSenseDirection: return affxcdf::SenseDirection; break; case ProbeAntiSenseDirection: return affxcdf::AntiSenseDirection; break; case ProbeEitherDirection: return affxcdf::EitherDirection; break; default: return affxcdf::NoDirection; break; } } else return affxcdf::NoDirection; } /* * Get the number of lists (atoms) in the group. */ int FusionCDFProbeGroupInformation::GetNumLists() const { if (gcosGroup) return gcosGroup->GetNumLists(); else if (calvinGroup) return calvinGroup->GetNumLists(); else return 0; } /* * Get the number of probes in the group. */ int FusionCDFProbeGroupInformation::GetNumCells() const { if (gcosGroup) return gcosGroup->GetNumCells(); else if (calvinGroup) return calvinGroup->GetNumCells(); else return 0; } /* * Get the number of probes per list. */ int FusionCDFProbeGroupInformation::GetNumCellsPerList() const { if (gcosGroup) return gcosGroup->GetNumCellsPerList(); else if (calvinGroup) return calvinGroup->GetNumCellsPerList(); else return 0; } /* * Get the start list index value. */ int FusionCDFProbeGroupInformation::GetStart() const { if (gcosGroup) return gcosGroup->GetStart(); else if (calvinGroup) { CDFProbeInformation probeInfo; calvinGroup->GetCell(0, probeInfo); return probeInfo.GetListIndex(); } else return 0; } /* * Get the stop list index value. */ int FusionCDFProbeGroupInformation::GetStop() const { if (gcosGroup) return gcosGroup->GetStop(); else if (calvinGroup) { CDFProbeInformation probeInfo; calvinGroup->GetCell(calvinGroup->GetNumCells()-1, probeInfo); return probeInfo.GetListIndex(); } else return 0; } /* * Get the group name. */ std::string FusionCDFProbeGroupInformation::GetName() const { if (gcosGroup) return gcosGroup->GetName(); else if (calvinGroup) return StringUtils::ConvertWCSToMBS(calvinGroup->GetName()); else return ""; } /* * Get the wobble situation. */ unsigned short FusionCDFProbeGroupInformation::GetWobbleSituation() const { if (gcosGroup) return gcosGroup->GetWobbleSituation(); else if (calvinGroup) return calvinGroup->GetWobbleSituation(); else return 0; } /* * Get the allele code. */ unsigned short FusionCDFProbeGroupInformation::GetAlleleCode() const { if (gcosGroup) return gcosGroup->GetAlleleCode(); else if (calvinGroup) return calvinGroup->GetAlleleCode(); else return 0; } /* * Get the channel. */ unsigned char FusionCDFProbeGroupInformation::GetChannel() const { if (gcosGroup) return gcosGroup->GetChannel(); else if (calvinGroup) return calvinGroup->GetChannel(); else return 0; } /* * Get the probe replication type. */ affxcdf::ReplicationType FusionCDFProbeGroupInformation::GetRepType() const { if (gcosGroup) return gcosGroup->GetRepType(); else if (calvinGroup) { ReplicationType rep = calvinGroup->GetRepType(); switch (rep) { case UnknownProbeRepType: return affxcdf::UnknownRepType; break; case DifferentProbeRepType: return affxcdf::DifferentRepType; break; case MixedProbeRepType: return affxcdf::MixedRepType; break; case IdenticalProbeRepType: return affxcdf::IdenticalRepType; break; default: return affxcdf::UnknownRepType; break; } } else return affxcdf::UnknownRepType; } /* * Retrieve the probe object given the index. */ void FusionCDFProbeGroupInformation::GetCell(int cell_index, FusionCDFProbeInformation & info) { if (gcosGroup) info.Initialize(cell_index, gcosGroup); else if (calvinGroup) info.Initialize(cell_index, calvinGroup); else info.Clear(); } /* * Initialize the numbers to NULL values. */ FusionCDFProbeGroupInformation::FusionCDFProbeGroupInformation() { gcosGroup = NULL; calvinGroup = NULL; } /* * Deallocate any used memory. */ FusionCDFProbeGroupInformation::~FusionCDFProbeGroupInformation() { Clear(); } /* * Deallocate any used memory. */ void FusionCDFProbeGroupInformation::Clear() { delete calvinGroup; calvinGroup = NULL; delete gcosGroup; gcosGroup = NULL; } /* * Get the group information. */ void FusionCDFProbeGroupInformation::Initialize(int index, affxcdf::CCDFProbeSetInformation *gcosSet) { Clear(); gcosGroup = new affxcdf::CCDFProbeGroupInformation; gcosSet->GetGroupInformation(index, *gcosGroup); } /* * Get the group information. */ void FusionCDFProbeGroupInformation::Initialize(int index, CDFProbeSetInformation *calvinSet) { Clear(); calvinGroup = new CDFProbeGroupInformation; calvinSet->GetGroupInformation(index, *calvinGroup); } //////////////////////////////////////////////////////////////////////////////////////////// /* * Initialize the variables to NULL values. */ FusionCDFProbeSetInformation::FusionCDFProbeSetInformation() { gcosSet = NULL; calvinSet = NULL; } /* * Deallocate member variables. */ FusionCDFProbeSetInformation::~FusionCDFProbeSetInformation() { Clear(); } /* * Deallocate member variables. */ void FusionCDFProbeSetInformation::Clear() { delete gcosSet; gcosSet = NULL; delete calvinSet; calvinSet = NULL; } /* * Initializes the class for GCOS file reading. */ void FusionCDFProbeSetInformation::Initialize(int index, affxcdf::CCDFFileData *cdf) { Clear(); gcosSet = new affxcdf::CCDFProbeSetInformation; cdf->GetProbeSetInformation(index, *gcosSet); } /* * Initializes the class for Calvin file reading. */ void FusionCDFProbeSetInformation::Initialize(int index, CDFData *cdf) { Clear(); calvinSet = new CDFProbeSetInformation; cdf->GetProbeSetInformation(index, *calvinSet); } /* * Gets the probe set type. */ affxcdf::GeneChipProbeSetType FusionCDFProbeSetInformation::GetProbeSetType() { if (gcosSet) return gcosSet->GetProbeSetType(); else if (calvinSet) { CDFDataTypeIds psType = calvinSet->GetProbeSetType(); switch (psType) { case Expression: return affxcdf::ExpressionProbeSetType; break; case Genotyping: return affxcdf::GenotypingProbeSetType; break; case Tag: return affxcdf::TagProbeSetType; break; case Resequencing: return affxcdf::ResequencingProbeSetType; break; case CopyNumber: return affxcdf::CopyNumberProbeSetType; break; case GenotypeControl: return affxcdf::GenotypeControlProbeSetType; break; case ExpressionControl: return affxcdf::ExpressionControlProbeSetType; break; case Marker: return affxcdf::MarkerProbeSetType; break; case MultichannelMarker: return affxcdf::MultichannelMarkerProbeSetType; break; default: return affxcdf::UnknownProbeSetType; break; } } else return affxcdf::UnknownProbeSetType; } /* * Get the probe sets direction. */ affxcdf::DirectionType FusionCDFProbeSetInformation::GetDirection() const { if (gcosSet) return gcosSet->GetDirection(); else if (calvinSet) { DirectionType dir = calvinSet->GetDirection(); switch (dir) { case ProbeNoDirection: return affxcdf::NoDirection; break; case ProbeSenseDirection: return affxcdf::SenseDirection; break; case ProbeAntiSenseDirection: return affxcdf::AntiSenseDirection; break; case ProbeEitherDirection: return affxcdf::EitherDirection; break; default: return affxcdf::NoDirection; break; } } else return affxcdf::NoDirection; } /* * Get the number of lists (atoms) in the group. */ int FusionCDFProbeSetInformation::GetNumLists() const { if (gcosSet) return gcosSet->GetNumLists(); else if (calvinSet) return calvinSet->GetNumLists(); else return 0; } /* * Get the number of groups in the set. */ int FusionCDFProbeSetInformation::GetNumGroups() const { if (gcosSet) return gcosSet->GetNumGroups(); else if (calvinSet) return calvinSet->GetNumGroups(); else return 0; } /* * Get the number of probes in the set. */ int FusionCDFProbeSetInformation::GetNumCells() const { if (gcosSet) return gcosSet->GetNumCells(); else if (calvinSet) return calvinSet->GetNumCells(); else return 0; } /* * Get the number of probes per list. */ int FusionCDFProbeSetInformation::GetNumCellsPerList() const { if (gcosSet) return gcosSet->GetNumCellsPerList(); else if (calvinSet) return calvinSet->GetNumCellsPerList(); else return 0; } /* * Get the probe set number. */ int FusionCDFProbeSetInformation::GetProbeSetNumber() const { if (gcosSet) return gcosSet->GetProbeSetNumber(); else if (calvinSet) return calvinSet->GetProbeSetNumber(); else return 0; } /* * Get the group object. */ void FusionCDFProbeSetInformation::GetGroupInformation(int index, FusionCDFProbeGroupInformation & info) { if (gcosSet) info.Initialize(index, gcosSet); else if (calvinSet) info.Initialize(index, calvinSet); else info.Clear(); } //////////////////////////////////////////////////////////////////////////////////////////// /* * Check the type and create the appropriate parser. */ void FusionCDFData::CreateObject() { Close(); if (FusionCDFData::IsCalvinCompatibleFile(fileName.c_str()) == false) { gcosData = new affxcdf::CCDFFileData; } else { calvinData = new affymetrix_calvin_io::CDFData; } } /* * Constructor */ FusionCDFData::FusionCDFData() { gcosData = NULL; calvinData = NULL; } /* * Destructor */ FusionCDFData::~FusionCDFData() { Close(); } /* * Set the name of the file. */ void FusionCDFData::SetFileName(const char *name) { fileName = name; } /* * Get the name of the file. */ std::string FusionCDFData::GetFileName() const { return fileName; } /* * Get the header object. */ FusionCDFFileHeader &FusionCDFData::GetHeader() { if (gcosData) { header.Initialize(gcosData); } else if (calvinData) { header.Initialize(calvinData); } return header; } /* * Get the GUID of the CDF file. * This only applies to CDF XDA format version >= 4. */ std::string FusionCDFData::GetGUID() { if (!gcosData && !calvinData) CreateObject(); if (gcosData) { gcosData->SetFileName(fileName.c_str()); return gcosData->GetGUID(); } else return std::string(""); } /* * Get the integrity md5 of the CDF file. */ std::string FusionCDFData::GetIntegrityMd5() { if (!gcosData && !calvinData) CreateObject(); if (gcosData) { gcosData->SetFileName(fileName.c_str()); return gcosData->GetIntegrityMd5(); } else return std::string(""); } /* * Get the error string. */ std::string FusionCDFData::GetError() const { if (gcosData) return gcosData->GetError(); else return ""; } /* * Get the name of a probe set. */ std::string FusionCDFData::GetProbeSetName(int index) const { if (gcosData) return gcosData->GetProbeSetName(index); else if (calvinData) return StringUtils::ConvertWCSToMBS(calvinData->GetProbeSetName(index)); else return std::string(""); } /* * Get the chip type (probe array type) of the CDF file. * This is the name of the file without extension for CDF XDA format version < 4. * For CDF XDA format version >= 4, this is array name without version. */ std::string FusionCDFData::GetChipType() { if (!gcosData && !calvinData) CreateObject(); if (gcosData) { gcosData->SetFileName(fileName.c_str()); return gcosData->GetChipType(); } else { int index = (int) fileName.rfind('\\'); if (index == -1) index = (int) fileName.rfind('/'); std::string chiptype = fileName.c_str() + index + 1; chiptype.resize(chiptype.length()-4); return chiptype; } } /* * Get the chip type (probe array type) of the CDF file. * This is the name of the file without extension for CDF XDA format version < 4. We * also include all substrings create by removing * characters to the right of each '.' * For CDF XDA format version >= 4, this is retrieved from the file header. */ std::vector FusionCDFData::GetChipTypes() { if (!gcosData && !calvinData) CreateObject(); if (gcosData) { gcosData->SetFileName(fileName.c_str()); return gcosData->GetChipTypes(); } else { std::vector chiptypes; std::string chiptype; int index = (int) fileName.rfind('\\'); if (index == -1) index = (int) fileName.rfind('/'); chiptype = fileName.c_str() + index + 1; chiptype.resize(chiptype.length()-4); // The full file name (minus .cdf extension) is the default (1st) // chip type. This matches what GetChipType() returns. // ie: foo.bar.v1.r2.cdf -> foo.bar.v1.r2 chiptypes.push_back(chiptype); //We then add all substrings starting at zero and ending at '.' // ie: foo.bar.v1.r2.cdf -> foo.bar.v1, foo.bar, foo std::string::size_type pos = chiptype.rfind(".",chiptype.size()-1); while (pos != std::string::npos){ if(pos>0) chiptypes.push_back(chiptype.substr(0,pos)); pos = chiptype.rfind(".",pos-1); } //ie: foo.bar.v1.r2, foo.bar.v1, foo.bar, foo return chiptypes; } } /* * Read the entire file. */ bool FusionCDFData::Read() { CreateObject(); if (gcosData) { gcosData->SetFileName(fileName.c_str()); return gcosData->Read(); } else { CDFFileReader reader; reader.SetFilename(fileName); try { reader.Read(*calvinData); } catch(...) { return false; } return true; } } /* * Read the header of the file only. Read a calvin file in its entirety - this really * reads the header until the data groups are opened. */ bool FusionCDFData::ReadHeader() { CreateObject(); if (gcosData) { gcosData->SetFileName(fileName.c_str()); return gcosData->ReadHeader(); } else { CDFFileReader reader; reader.SetFilename(fileName); try { reader.Read(*calvinData); } catch(...) { return false; } return true; } } /* * Check if the file exists. */ bool FusionCDFData::Exists() { return FileUtils::Exists(fileName.c_str()); } /*! Deallocates memory and closes any file handles. */ void FusionCDFData::Close() { if (gcosData) { gcosData->Close(); delete gcosData; gcosData = NULL; } if (calvinData) { delete calvinData; calvinData = NULL; } } /* * Determines if a CDF file is of the XDA (binary) format. */ bool FusionCDFData::IsXDACompatibleFile(const char *fileName) { affxcdf::CCDFFileData cdf; cdf.SetFileName(fileName); return cdf.IsXDACompatibleFile(); } /* * Determines if a CDF file is of the Calvin format. */ bool FusionCDFData::IsCalvinCompatibleFile(const char *fileName) { GenericData data; GenericFileReader reader; reader.SetFilename(fileName); try { reader.ReadHeader(data, GenericFileReader::ReadNoDataGroupHeader); return true; } catch (affymetrix_calvin_exceptions::CalvinException) { } return false; } /* * Get the probe set type for non-qc probe sets. */ affxcdf::GeneChipProbeSetType FusionCDFData::GetProbeSetType(int index) const { if (gcosData) { return gcosData->GetProbeSetType(index); } else if (calvinData && calvinData->GetGenericData().Header().GetGenericDataHdr()->GetFileTypeId() != AFFY_CNTRL_PS) { std::string dataTypeId = calvinData->GetDataTypeId(); if (dataTypeId == AFFY_EXPR_PS) return affxcdf::ExpressionProbeSetType; else if (dataTypeId == AFFY_GENO_PS) return affxcdf::GenotypingProbeSetType; else if (dataTypeId == AFFY_RESEQ_PS) return affxcdf::ResequencingProbeSetType; else if (dataTypeId == AFFY_TAG_PS) return affxcdf::TagProbeSetType; else return affxcdf::UnknownProbeSetType; } else return affxcdf::UnknownProbeSetType; } /* * Get the probe set information. */ void FusionCDFData::GetProbeSetInformation(int index, FusionCDFProbeSetInformation & info) { if (gcosData) { info.Initialize(index, gcosData); } else if (calvinData && calvinData->GetGenericData().Header().GetGenericDataHdr()->GetFileTypeId() != AFFY_CNTRL_PS) { info.Initialize(index, calvinData); } else info.Clear(); } /* * Get the QC probe set information by index. */ void FusionCDFData::GetQCProbeSetInformation(int index, FusionCDFQCProbeSetInformation & info) { if (gcosData) { info.Initialize(index, gcosData); } else if (calvinData && calvinData->GetGenericData().Header().GetGenericDataHdr()->GetFileTypeId() == AFFY_CNTRL_PS) { info.Initialize(index, calvinData); } else info.Clear(); } /* * Get the QC probe set information by type. */ void FusionCDFData::GetQCProbeSetInformation(affxcdf::GeneChipQCProbeSetType qcType, FusionCDFQCProbeSetInformation & info) { if (gcosData) { info.Initialize(qcType, gcosData); } else if (calvinData && calvinData->GetGenericData().Header().GetGenericDataHdr()->GetFileTypeId() == AFFY_CNTRL_PS) { info.Initialize(qcType, calvinData); } else info.Clear(); } affxparser/src/fusion/calvin_files/fusion/src/FusionCDFData.h0000644000175200017520000004004514516003651025306 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionCDFData_HEADER_ #define _AffymetrixFusionCDFData_HEADER_ /*! \file FusionCDFData.h This file defines the Fusion CDF Data classes */ #include "calvin_files/data/src/CDFData.h" #include "calvin_files/data/src/CDFProbeGroupInformation.h" #include "calvin_files/data/src/CDFProbeInformation.h" #include "calvin_files/data/src/CDFQCProbeInformation.h" #include "calvin_files/data/src/CDFQCProbeSetInformation.h" // #include "file/CDFFileData.h" // namespace affymetrix_fusion_io { /*! Storage for the header in a CDF file. */ class FusionCDFFileHeader { protected: /*! The GCOS CDF file header object. */ affxcdf::CCDFFileHeader *gcosHeader; /*! The Calvin CDF file object. */ affymetrix_calvin_io::CDFData *calvinData; /*! The reference sequence */ std::string ref; /*! Initializes the class based on GCOS information. * @param data The GCOS CDF file object. */ void Initialize(affxcdf::CCDFFileData *data); /*! Initializes the class based on Calvin information. * @param data The Calvin CDF file object. */ void Initialize(affymetrix_calvin_io::CDFData *data); /*! Friend to the parent. */ friend class FusionCDFData; public: /*! Get CDF Format Version * @return CDF Format Version */ int GetFormatVersion() const; /*! Get GUID * @return GUID */ std::string GetGUID() const; /*! Get integrity md5 * @return integrity md5 */ std::string GetIntegrityMd5() const; /*! Gets the number of feature columns in the array. * @return The number of columns. */ int GetCols() const; /*! Gets the number of feature rows in the array. * @return The number of rows. */ int GetRows() const; /*! Gets the number of probe sets. * @return The number of probe sets. */ int GetNumProbeSets() const; /*! Gets the number of QC probe sets. * @return The number of columns. */ int GetNumQCProbeSets() const; /*! Gets the reference sequence (for resequencing arrays only). * @return The reference sequence. */ std::string &GetReference(); /*! Constructor */ FusionCDFFileHeader(); }; /*! This class provides storage for an individual probe in a CDF file */ class FusionCDFProbeInformation { private: /*! The GCOS probe object. */ affxcdf::CCDFProbeInformation *gcosProbe; /*! The Calvin probe object. */ affymetrix_calvin_io::CDFProbeInformation *calvinProbe; /*! Initializes the class for GCOS file reading. * @param index The index to the probe. * @param gcosGroup The GCOS probe group object. */ void Initialize(int index, affxcdf::CCDFProbeGroupInformation *gcosGroup); /*! Initializes the class for Calvin file reading. * @param index The index to the probe. * @param calvinGroup The Calvin probe group object. */ void Initialize(int index, affymetrix_calvin_io::CDFProbeGroupInformation *calvinGroup); /*! Deallocates any used memory. */ void Clear(); /*! Friend to the parent class. */ friend class FusionCDFProbeGroupInformation; public: /*! Returns the list index. * @return The list index. */ int GetListIndex() const; /*! Returns the expos value. * @return The expos value. */ int GetExpos() const; /*! Returns the X coordinate. * @return The X coordinate. */ int GetX() const; /*! Returns the Y coordinate. * @return The Y coordinate. */ int GetY() const; /*! Returns the probes base at the interrogation position. * @return The probes base at the interrogation position. */ char GetPBase() const; /*! Returns the targets base at the interrogation position. * @return The targets base at the interrogation position. */ char GetTBase() const; /*! Returns the probe length. * @return The probe length. */ unsigned short GetProbeLength() const; /*! Returns the probe grouping. * @return The probe grouping. */ unsigned short GetProbeGrouping() const; /*! Constructor */ FusionCDFProbeInformation(); /*! Destructor */ ~FusionCDFProbeInformation(); }; /*! This class provides storage for a group of probes, also known as a block. */ class FusionCDFProbeGroupInformation { private: /*! The GCOS probe group object. */ affxcdf::CCDFProbeGroupInformation *gcosGroup; /*! The Calvin probe group object. */ affymetrix_calvin_io::CDFProbeGroupInformation *calvinGroup; /*! Initializes the class for GCOS file reading. * @param index The index to the probe group. * @param gcosSet The GCOS probe set object. */ void Initialize(int index, affxcdf::CCDFProbeSetInformation *gcosSet); /*! Initializes the class for Calvin file reading. * @param index The index to the probe group. * @param calvinSet The Calvin probe set object. */ void Initialize(int index, affymetrix_calvin_io::CDFProbeSetInformation *calvinSet); /*! Deallocates any used memory. */ void Clear(); /*! Friend to the parent class. */ friend class FusionCDFProbeSetInformation; public: /*! Gets the groups direction. * @return The groups direction. */ affxcdf::DirectionType GetDirection() const; /*! Gets the number of lists (atoms) in the group. * @return The number of lists (atoms) in the group. */ int GetNumLists() const; /*! Gets the number of probes in the group. * @return The number of probes in the group. */ int GetNumCells() const; /*! Gets the number of probes per list. * @return The number of probes per list. */ int GetNumCellsPerList() const; /*! Gets the start list index value. * @return The start list index value. */ int GetStart() const; /*! Gets the stop list index value. * @return The stop list index value. */ int GetStop() const; /*! Gets the group name. * @return The group name. */ std::string GetName() const; /*! Gets the wobble situation. * @return The wobble situation. */ unsigned short GetWobbleSituation() const; /*! Gets the allele code. * @return The allele code. */ unsigned short GetAlleleCode() const; /*! Gets the channel. * @return The channel. */ unsigned char GetChannel() const; /*! Gets the probe replication type. * @return The probe replication type. */ affxcdf::ReplicationType GetRepType() const; /*! Retrieves the probe object given an index. * @param cell_index Index to the probe of interest. * @param info The returned probe data. */ void GetCell(int cell_index, FusionCDFProbeInformation & info); /*! Constructor */ FusionCDFProbeGroupInformation(); /*! Destructor */ ~FusionCDFProbeGroupInformation(); }; /*! This class provides storage for a probe set. */ class FusionCDFProbeSetInformation { private: /*! The GCOS probe set object. */ affxcdf::CCDFProbeSetInformation *gcosSet; /*! The Calvin probe set object. */ affymetrix_calvin_io::CDFProbeSetInformation *calvinSet; /*! Initializes the class for GCOS file reading. * @param index The index to the probe set. * @param cdf The GCOS CDF file object. */ void Initialize(int index, affxcdf::CCDFFileData *cdf); /*! Initializes the class for Calvin file reading. * @param index The index to the probe set. * @param cdf The Calvin CDF file object. */ void Initialize(int index, affymetrix_calvin_io::CDFData *cdf); /*! Deallocates any used memory. */ void Clear(); /*! Friend to the parent class. */ friend class FusionCDFData; public: /*! Gets the probe set type. * @return The probe set type. */ affxcdf::GeneChipProbeSetType GetProbeSetType(); /*! Gets the probe sets direction. * @return The probe sets direction. */ affxcdf::DirectionType GetDirection() const; /*! Gets the number of lists (atoms) in the group. * @return The number of lists (atoms) in the group. */ int GetNumLists() const; /*! The number of groups in the set. * @return The number of groups in the set. */ int GetNumGroups() const; /*! The number of probes in the set. * @return The number of probes in the set. */ int GetNumCells() const; /*! Gets the number of probes per list. * @return The number of probes per list. */ int GetNumCellsPerList() const; /*! Gets the probe set number. * @return The probe set number. */ int GetProbeSetNumber() const; /*! Gets a group object. * @param index The index to the group of interest. * @param info The returned group data. */ void GetGroupInformation(int index, FusionCDFProbeGroupInformation & info); /*! Constructor */ FusionCDFProbeSetInformation(); /*! Destructor */ ~FusionCDFProbeSetInformation(); }; /*! This class provides storage for QC probes */ class FusionCDFQCProbeInformation { private: /*! The GCOS probe object. */ affxcdf::CCDFQCProbeInformation *gcosProbe; /*! The Calvin probe object. */ affymetrix_calvin_io::CDFQCProbeInformation *calvinProbe; /*! Initializes the class for GCOS file reading. * @param index The index to the probe. * @param gcosSet The GCOS QC probe set. */ void Initialize(int index, affxcdf::CCDFQCProbeSetInformation *gcosSet); /*! Initializes the class for Calvin file reading. * @param index The index to the probe. * @param calvinSet The Calvin QC probe set. */ void Initialize(int index, affymetrix_calvin_io::CDFQCProbeSetInformation *calvinSet); /*! Deallocates any used memory. */ void Clear(); /*! Friend to the parent class. */ friend class FusionCDFQCProbeSetInformation; public: /*! Constructor */ FusionCDFQCProbeInformation(); /*! Destructor */ ~FusionCDFQCProbeInformation(); /*! Gets the X cooridnate of the probe. * @return The X coordinate. */ int GetX() const; /*! Gets the Y cooridnate of the probe. * @return The Y coordinate. */ int GetY() const; /*! Gets the probe length. * @return The probe length. This value may be 1 for non-synthesized features. */ int GetPLen() const; /*! Gets the flag indicating if the probe is a perfect match probe. * @return The flag indicating if the probe is a perfect match probe */ bool IsPerfectMatchProbe() const; /*! Gets a flag indicating if the probe is used for background calculations (blank feature). * @return Flag indicating if the probe is used for background calculations (blank feature). */ bool IsBackgroundProbe() const; }; /*! This class provides storage for the probes in a QC probe set. */ class FusionCDFQCProbeSetInformation { private: /*! The GCOS probe set object. */ affxcdf::CCDFQCProbeSetInformation *gcosSet; /*! The Calvin probe set object. */ affymetrix_calvin_io::CDFQCProbeSetInformation *calvinSet; /*! Initializes the class for GCOS file reading. * @param index The index to the QC probe set. * @param cdf The GCOS CDF file object. */ void Initialize(int index, affxcdf::CCDFFileData *cdf); /*! Initializes the class for GCOS file reading. * @param qcType The type of QC probe set. * @param cdf The GCOS CDF file object. */ void Initialize(affxcdf::GeneChipQCProbeSetType qcType, affxcdf::CCDFFileData *cdf); /*! Initializes the class for Calvin file reading. * @param index The index to the QC probe set. * @param cdf The Calvin CDF file object. */ void Initialize(int index, affymetrix_calvin_io::CDFData *cdf); /*! Initializes the class for Calvin file reading. * @param qcType The type of QC probe set. * @param cdf The Calvin CDF file object. */ void Initialize(affxcdf::GeneChipQCProbeSetType qcType, affymetrix_calvin_io::CDFData *cdf); /*! Deallocates any used memory. */ void Clear(); /*! Friend to the parent class. */ friend class FusionCDFData; public: /*! Gets the probe set type. * @return The probe set type. */ affxcdf::GeneChipQCProbeSetType GetQCProbeSetType() const; /*! Gets the number of probes in the set. * @return The number of probes in the set. */ int GetNumCells() const; /*! Gets the information about a single probe in the set. * @param index The index to the probe of interest. * @param info The information about the probe. */ void GetProbeInformation(int index, FusionCDFQCProbeInformation & info); /*! Constructor */ FusionCDFQCProbeSetInformation(); /*! Destructor */ ~FusionCDFQCProbeSetInformation(); }; /*! This defines the combined GCOS/Calvin CDF data interaction class. */ class FusionCDFData { protected: /*! The GCOS CDF file object. */ affxcdf::CCDFFileData *gcosData; /*! The header object. */ FusionCDFFileHeader header; /*! The Calvin CDF file object. */ affymetrix_calvin_io::CDFData *calvinData; /*! The name of the file to read. */ std::string fileName; /*! Creates either the GCOS or Calvin parser object. */ void CreateObject(); public: /*! Constructor */ FusionCDFData(); /*! Destructor */ ~FusionCDFData(); /*! Sets the name of the file. * @param name The full path of the CDF file. */ void SetFileName(const char *name); /*! Gets the name of the file. * @return The full path of the CDF file. */ std::string GetFileName() const; /*! Gets the header object. * @return The CDF file header object. */ FusionCDFFileHeader &GetHeader(); /*! Get GUID * @return GUID */ std::string GetGUID(); /*! Get integrity md5 * @return integrity md5 */ std::string GetIntegrityMd5(); /*! Gets the error string. * @return A string describing the last read error. */ std::string GetError() const; /*! Gets the name of a probe set. * @param index The index to the probe set name of interest. * @return The probe set name. */ std::string GetProbeSetName(int index) const; /*! Gets the chip type (probe array type) of the CDF file. * @return The chip type. This is just the name (without extension) of the CDF file. */ std::string GetChipType(); /*! Gets the chip types (probe array type) of the CDF file. Allow substrings deliminated by '.' * @return vector of chip types */ std::vector GetChipTypes(); /*! Reads the entire file. * @return True if successful. */ bool Read(); /*! Reads the header of the file only. * @return True if successful. */ bool ReadHeader(); /*! Checks if the file exists. * @return True if the file exists. */ bool Exists(); /*! Deallocates memory and closes any file handles. */ void Close(); /*! Determines if a CDF file is of the XDA (binary) format. * @param fileName The name of the file to test. * @return True if XDA format. */ static bool IsXDACompatibleFile(const char *fileName); /*! Determines if a CDF file is of the Calvin format. * @param fileName The name of the file to test. * @return True if Calvin format. */ static bool IsCalvinCompatibleFile(const char *fileName); /*! Gets the probe set type for non-qc probe sets. * @param index The index to the probe set of interest. * @return The type of probe set. */ affxcdf::GeneChipProbeSetType GetProbeSetType(int index) const; /*! Gets the probe set information. * @param index The index to the probe set of interest. * @param info The probe set information. * @return The probe set information. */ void GetProbeSetInformation(int index, FusionCDFProbeSetInformation & info); /*! Gets the QC probe set information by index. * @param index The index to the QC probe set of interest. * @param info The QC probe set information. * @return The QC probe set information. */ void GetQCProbeSetInformation(int index, FusionCDFQCProbeSetInformation & info); /*! Gets the QC probe set information by type. * @param qcType The type of QC probe set to retrieve. * @param info The QC probe set information. * @return The QC probe set information. */ void GetQCProbeSetInformation(affxcdf::GeneChipQCProbeSetType qcType, FusionCDFQCProbeSetInformation & info); }; } #endif //_AffymetrixFusionCDFData_HEADER_ affxparser/src/fusion/calvin_files/fusion/src/FusionCDFQCProbeSetNames.cpp0000644000175200017520000000557514516003651027734 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionCDFQCProbeSetNames.h" // using namespace affxcdf; using namespace affymetrix_fusion_io; /*! The probe set names for the control probe sets. * This array needs to be the same size as the number of values * defined in the GeneChipQC enumerant (in the GCOS File SDK CDFFileData.h file) .*/ std::wstring FusionCDFQCProbeSetNames::GetStaticCDFQCProbeSetName(GeneChipQCProbeSetType qcType) { switch (qcType) { case UnknownQCProbeSetType: return L"UnknownQC"; break; case CheckerboardNegativeQCProbeSetType: return L"CheckerboardNegativeQC"; break; case CheckerboardPositiveQCProbeSetType: return L"CheckerboardPositiveQC"; break; case HybNegativeQCProbeSetType: return L"HybNegativeQC"; break; case HybPositiveQCProbeSetType: return L"HybPositiveQC"; break; case TextFeaturesNegativeQCProbeSetType: return L"TextFeaturesNegativeQC"; break; case TextFeaturesPositiveQCProbeSetType: return L"TextFeaturesPositiveQC"; break; case CentralNegativeQCProbeSetType: return L"CentralNegativeQC"; break; case CentralPositiveQCProbeSetType: return L"CentralPositiveQC"; break; case GeneExpNegativeQCProbeSetType: return L"GeneExpNegativeQC"; break; case GeneExpPositiveQCProbeSetType: return L"GeneExpPositiveQC"; break; case CycleFidelityNegativeQCProbeSetType: return L"CycleFidelityNegativeQC"; break; case CycleFidelityPositiveQCProbeSetType: return L"CycleFidelityPositiveQC"; break; case CentralCrossNegativeQCProbeSetType: return L"CentralCrossNegativeQC"; break; case CentralCrossPositiveQCProbeSetType: return L"CentralCrossPositiveQC"; break; case CrossHybNegativeQCProbeSetType: return L"CrossHybNegativeQC"; break; case CrossHybPositiveQCProbeSetType: return L"CrossHybPositiveQC"; break; case SpatialNormalizationNegativeQCProbeSetType: return L"SpatialNormalizationNegativeQC"; break; case SpatialNormalizationPositiveQCProbeSetType: return L"SpatialNormalizationPositiveQC"; break; default: return L"UnknownQC"; break; } } affxparser/src/fusion/calvin_files/fusion/src/FusionCDFQCProbeSetNames.h0000644000175200017520000000274614516003651027376 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FusionCDFQCProbeSetNames_HEADER_ #define _FusionCDFQCProbeSetNames_HEADER_ /*! \file FusionCDFQCProbeSetNames.h This file defines the QC probe set names for the GCOS QC probe sets. */ #include "file/CDFFileData.h" // #include #include // namespace affymetrix_fusion_io { /*! This class provides the QC probe set names. */ class FusionCDFQCProbeSetNames { public: /*! The probe set names for the control probe sets. * @param qcType The type of qc probe set. * @return A string value .*/ static std::wstring GetStaticCDFQCProbeSetName(affxcdf::GeneChipQCProbeSetType qcType); }; } #endif //_CDFQCProbeSetNames_HEADER_ affxparser/src/fusion/calvin_files/fusion/src/FusionCELData.cpp0000644000175200017520000003000114516003651025637 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionCELData.h" // #include "calvin_files/fusion/src/CalvinAdapter/CalvinCELDataAdapter.h" #include "calvin_files/fusion/src/GCOSAdapter/GCOSCELDataAdapter.h" #include "calvin_files/parsers/src/FileException.h" #include "calvin_files/utils/src/FileUtils.h" // #include "util/Fs.h" // #include #include #include // using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_exceptions; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_parameter; /* * Default constructor */ FusionCELData::FusionCELData() { adapter = 0; parameterListRead = false; } /* * Destructor */ FusionCELData::~FusionCELData() { Clear(); DeleteAdapter(); } /* * Set the file name. */ void FusionCELData::SetFileName(const char *str) { filename = str; } /* * Get the file name. */ std::string FusionCELData::GetFileName() const { return filename; } /* * Get the file id. */ AffymetrixGuidType FusionCELData::GetFileId() const { CheckAdapter(); return adapter->GetFileId(); } /* * Get the generic data object. */ GenericData *FusionCELData::GetGenericData() { CheckAdapter(); return adapter->GetGenericData(); } /*! Maps X/Y coordinates to CEL file index. * @param x The x coordinate. * @param y The y coordinate. * @param r The number of rows. * @param c The number of columns. * @return The index to the intensity arrays. */ int FusionCELData::XYToIndex(int x, int y, int r, int c) { return ((y*c) + x); } /* * Returns the list of parameters associated with a data set, empty for GCOS files */ ParameterNameValueTypeList FusionCELData::GetDataSetParameters(const std::wstring &setName) { CheckAdapter(); return adapter->GetDataSetParameters(setName); } /* * Set the error string. */ void FusionCELData::SetError(const wchar_t* value) { CheckAdapter(); adapter->SetError(value); } /* * Retrieve error string. */ std::wstring FusionCELData::GetError() { CheckAdapter(); return adapter->GetError(); } std::wstring FusionCELData::GetHeaderKey(const wchar_t* key) { CheckAdapter(); return adapter->GetHeaderKey(key); } /* * Retrieve CEL file format version number. */ int FusionCELData::GetVersion() { CheckAdapter(); return adapter->GetVersion(); } /* * Retrieve number of columns in array. */ int FusionCELData::GetCols() { CheckAdapter(); return adapter->GetCols(); } /* * Retrieve number of rows in array. */ int FusionCELData::GetRows() { CheckAdapter(); return adapter->GetRows(); } /* * Retrieve number of cells in array. */ int FusionCELData::GetNumCells() { CheckAdapter(); return adapter->GetNumCells(); } /* * Retrieve header in a single string */ std::wstring FusionCELData::GetHeader() { CheckAdapter(); return adapter->GetHeader(); } /* * Retrieve algorithm name. */ std::wstring FusionCELData::GetAlg() { CheckAdapter(); return adapter->GetAlg(); } /* * Retrieve algorithm version. */ std::wstring FusionCELData::GetAlgVer() { CheckAdapter(); return adapter->GetAlgVer(); } /* * Retrieve algorithm parameters. */ std::wstring FusionCELData::GetParams() { CheckAdapter(); return adapter->GetParams(); } /* * Retrieve algorithm parameter of specified tag */ std::wstring FusionCELData::GetAlgorithmParameter(const wchar_t *tag) { CheckAdapter(); return adapter->GetAlgorithmParameter(tag); } /* * Retrieves the algorithm parameter name (tag) for a given index position. */ std::wstring FusionCELData::GetAlgorithmParameterTag(int index) { CheckAdapter(); return adapter->GetAlgorithmParameterTag(index); } /* * Retrieves the number of algorithm parameters. */ int FusionCELData::GetNumberAlgorithmParameters() { CheckAdapter(); return adapter->GetNumberAlgorithmParameters(); } /* * Retrieve algorithm parameters */ std::wstring FusionCELData::GetAlgorithmParameters() { CheckAdapter(); return adapter->GetAlgorithmParameters(); } /* * Retreive the algorithm parameters in a list */ FusionTagValuePairTypeList &FusionCELData::GetParameters() { CheckAdapter(); FillParameterList(); return parameterList; } /* * Get the DatHeader string. */ std::wstring FusionCELData::GetDatHeader() { CheckAdapter(); return adapter->GetDatHeader(); } /* * Retrieve chip type. */ std::wstring FusionCELData::GetChipType() { CheckAdapter(); return adapter->GetChipType(); } /* * Get the library package */ std::wstring FusionCELData::GetLibraryPackageName() { CheckAdapter(); return adapter->GetLibraryPackageName(); } /* * Get the master file */ std::wstring FusionCELData::GetMasterFileName() { CheckAdapter(); return adapter->GetMasterFileName(); } /* * Retrieve cell margin. */ int FusionCELData::GetCellMargin() { CheckAdapter(); return adapter->GetCellMargin(); } /* * Retrieve number of outliers. */ unsigned int FusionCELData::GetNumOutliers() { CheckAdapter(); return adapter->GetNumOutliers(); } /* * Retrieve number of masked cells. */ unsigned int FusionCELData::GetNumMasked() { CheckAdapter(); return adapter->GetNumMasked(); } /* * Get the grid coordinates. */ affymetrix_fusion_io::FGridCoords FusionCELData::GetGridCorners() { CheckAdapter(); return adapter->GetGridCorners(); } // Index/position conversions /* * Get x coordinates from index. */ int FusionCELData::IndexToX(int index) { CheckAdapter(); return adapter->IndexToX(index); } /* * Get y coordinates from index. */ int FusionCELData::IndexToY(int index) { CheckAdapter(); return adapter->IndexToY(index); } /* * Convert x, y coordinates to index. */ int FusionCELData::XYToIndex(int x, int y) { CheckAdapter(); return adapter->XYToIndex(x, y); } // Accessors for intensity information. /* * Retrieve a CEL file entry. */ void FusionCELData::GetEntry(int index, FusionCELFileEntryType &entry) { CheckAdapter(); return adapter->GetEntry(index, entry); } /* * Retrieve a CEL file entry. */ void FusionCELData::GetEntry(int x, int y, FusionCELFileEntryType &entry) { CheckAdapter(); return adapter->GetEntry(x, y, entry); } /* * Retrieve a CEL file intensity. */ float FusionCELData::GetIntensity(int index) { CheckAdapter(); return adapter->GetIntensity(index); } /* * Retrieve vector of CEL file intensities. */ int FusionCELData::GetIntensities(int index,std::vector& intensities) { CheckAdapter(); return adapter->GetIntensities(index,intensities); } /* * Retrieve a CEL file intensity. */ float FusionCELData::GetIntensity(int x, int y) { CheckAdapter(); return adapter->GetIntensity(x, y); } /* * Retrieve a CEL file stdv value. */ float FusionCELData::GetStdv(int index) { CheckAdapter(); return adapter->GetStdv(index); } /* * Retrieve a CEL file stdv value. */ float FusionCELData::GetStdv(int x, int y) { CheckAdapter(); return adapter->GetStdv(x, y); } /* * Retrieve a CEL file pixel count. */ short FusionCELData::GetPixels(int index) { CheckAdapter(); return adapter->GetPixels(index); } /* * Retrieve a CEL file pixel count. */ short FusionCELData::GetPixels(int x, int y) { CheckAdapter(); return adapter->GetPixels(x, y); } // Accessors for the mask/outlier flags /* * Retrieve a CEL file mask flag. */ bool FusionCELData::IsMasked(int x, int y) { CheckAdapter(); return adapter->IsMasked(x, y); } /* * Retrieve a CEL file mask flag. */ bool FusionCELData::IsMasked(int index) { CheckAdapter(); return adapter->IsMasked(index); } /* * Retrieve a CEL file outlier flag. */ bool FusionCELData::IsOutlier(int x, int y) { CheckAdapter(); return adapter->IsOutlier(x, y); } /* * Retrieve a CEL file outlier flag. */ bool FusionCELData::IsOutlier(int index) { CheckAdapter(); return adapter->IsOutlier(index); } // For reading a file. /* * Closes the file. */ void FusionCELData::Close() { if (adapter != NULL) adapter->Close(); } /* * Returns the file size. */ unsigned int FusionCELData::GetFileSize() { return Fs::fileSize(filename); } /* * Check if the file exists. */ bool FusionCELData::Exists() { assert(filename != ""); return FileUtils::Exists(filename.c_str()); } /* * Reads the header of the CEL file. */ bool FusionCELData::ReadHeader() { if (Exists() == false) return false; try { CreateAdapter(); return adapter->ReadHeader(); } catch (...) { return false; } } /* * Reads the CEL file. */ bool FusionCELData::Read(bool bIncludeMaskAndOutliers) { if (Exists() == false) return false; try { CreateAdapter(); return adapter->Read(bIncludeMaskAndOutliers); } catch (...) { return false; } } /* * Determine CEL file format and call appropriate function to read file using the specified file name. */ bool FusionCELData::ReadEx(const char *filename_, int state) { filename = filename_; if (Exists() == false) return false; try { CreateAdapter(); return adapter->ReadEx(filename_, state); } catch (...) { return false; } } /* * Returns the reading state. */ int FusionCELData::GetReadState() { CheckAdapter(); return adapter->GetReadState(); } /* * Clears the members. */ void FusionCELData::Clear() { if (adapter) adapter->Clear(); parameterListRead = false; parameterList.clear(); } /* */ void FusionCELData::CreateAdapter() { DeleteAdapter(); // Create a Calvin adapter IFusionCELDataAdapter* calvinAdapter = new CalvinCELDataAdapter; if (calvinAdapter) { calvinAdapter->SetFileName(filename); if (calvinAdapter->CanReadFile()) { adapter = calvinAdapter; } else { delete calvinAdapter; IFusionCELDataAdapter* gcosAdapter = new GCOSCELDataAdapter; if (gcosAdapter) { gcosAdapter->SetFileName(filename); if (gcosAdapter->CanReadFile()) { adapter = gcosAdapter; } else { UnableToOpenFileException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } else { UnableToOpenFileException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } } } /* */ void FusionCELData::DeleteAdapter() { if (adapter) delete adapter; adapter = 0; } /* */ void FusionCELData::CheckAdapter() const { if (adapter == 0) { FileNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } /* */ void FusionCELData::FillParameterList() { if (parameterListRead == false) { adapter->GetParameters(parameterList); parameterListRead = true; } } /* Sets the active data group for a multi-group CEL file. Default is the first group. */ void FusionCELData::SetActiveDataGroup(const std::wstring &groupName) { CheckAdapter(); adapter->SetActiveDataGroup(groupName); } /* Is this a multi-color CEL file? */ bool FusionCELData::IsMultiColor() { CheckAdapter(); return adapter->IsMultiColor(); } /* Returns a list of the channel (ie data group) names */ WStringVector FusionCELData::GetChannels() { CheckAdapter(); return adapter->GetChannels(); } affxparser/src/fusion/calvin_files/fusion/src/FusionCELData.h0000644000175200017520000002461514516003651025322 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionCELData_HEADER_ #define _AffymetrixFusionCELData_HEADER_ /*! \file FusionCELData.h This file defines the Fusion CEL Data classes */ #include "calvin_files/fusion/src/FusionCELDataAdapterInterface.h" // namespace affymetrix_fusion_io { //////////////////////////////////////////////////////////////////// /*! This defines the combined GCOS/Calvin CEL data interaction class. */ class FusionCELData { public: /*! CEL file reading states */ enum { CEL_ALL=1, CEL_DATA=2, CEL_OUTLIER=4, CEL_MASK=8 }; public: /*! Constructor */ FusionCELData(); /*! Destructor */ ~FusionCELData(); public: /*! Set the file name. * @ value The file name. Can have the full path or relative path */ void SetFileName(const char *value); /*! Get the file name. * @return File name */ std::string GetFileName() const; /*! Get the id of the file (only valid for Command Console "calvin" files) * @return The unique file id. */ affymetrix_calvin_utilities::AffymetrixGuidType GetFileId() const; /*! Set the error string. * @param value The error. */ void SetError(const wchar_t* value); /*! Retrieve error string * @return Error string */ std::wstring GetError(); /*! Accessors for header information. */ std::wstring GetHeaderKey(const wchar_t* key); /*! Retrieve CEL file format version number. * @return CEL file format version number */ int GetVersion(); /*! Retrieve number of columns in array * @return Number of columns in array */ int GetCols(); /*! Retrieve number of rows in array * @return Number of rows in array */ int GetRows(); /*! Retrieve number of cells in array * @return Number of cells in array */ int GetNumCells(); /*! Retrieve header in a single string * @return Header string */ std::wstring GetHeader(); /*! Retrieve algorithm name * @return Algorithm name */ std::wstring GetAlg(); /*! Retrieve algorithm version * @return Algorithm version */ std::wstring GetAlgVer(); /*! Retrieve algorithm parameters * @return Algorithm parameters */ std::wstring GetParams(); /*! Retrieve algorithm parameter of specified tag * @param tag Algorithm parameter tag * @return Algorithm parameter value */ std::wstring GetAlgorithmParameter(const wchar_t *tag); /*! Retrieves the algorithm parameter name (tag) for a given index position. * @param index The zero based index to the parameter array (0 to the number of alg parameters - 1). * @return The parameter name (tag). */ std::wstring GetAlgorithmParameterTag(int index); /*! Retrieves the number of algorithm parameters. * @return The number of algorithm parameters. */ int GetNumberAlgorithmParameters(); /*! Retrieve algorithm parameters * @return Algorithm parameters */ std::wstring GetAlgorithmParameters(); /*! Retreive the algorithm parameters in a list. * @return An STL list of algorithm parameters */ FusionTagValuePairTypeList &GetParameters(); /*! Get the DatHeader string. * @return The DatHeader string. */ std::wstring GetDatHeader(); /*! Retrieve chip type * @return Chip type */ std::wstring GetChipType(); /*! Get the library package * @return library package name (blank for GCOS format CEL files) */ std::wstring GetLibraryPackageName(); /*! Get the master file * @return master file name (blank for GCOS format CEL files) */ std::wstring GetMasterFileName(); /*! Retrieve cell margin * @return Cell margin */ int GetCellMargin(); /*! Retrieve number of outliers * @return Number of outliers */ unsigned int GetNumOutliers(); /*! Retrieve number of masked cells * @return Number of masked cells */ unsigned int GetNumMasked(); /*! Get the grid coordinates. * @return Returns the grid coordinates. */ FGridCoords GetGridCorners(); // Index/position conversions /*! Get x coordinates from index * @return X coordinates */ int IndexToX(int index); /*! Get y coordinates from index * @return Y coordinates */ int IndexToY(int index); /*! Convert x, y coordinates to index * @return Index */ int XYToIndex(int x, int y); /*! Maps X/Y coordinates to CEL file index. * @param x The x coordinate. * @param y The y coordinate. * @param r The number of rows. * @param c The number of columns. * @return The index to the intensity arrays. */ static int XYToIndex(int x, int y, int r, int c); // Accessors for intensity information. /*! Retrieve a CEL file entry. * @param index The index to the CEL file entries. * @param entry The CEL file entry. */ void GetEntry(int index, FusionCELFileEntryType &entry); /*! Retrieve a CEL file entry. * @param x The X coordinate. * @param y The Y coordinate. * @param entry The CEL file entry. */ void GetEntry(int x, int y, FusionCELFileEntryType &entry); /*! Retrieve a CEL file intensity. * @param index The index to the CEL file entries. * @return The CEL file intensity. */ float GetIntensity(int index); /// @brief Get a vector of intensities. /// @param index The index of where to start. /// @param intensity_vec The vector to fill, its size is the number of intensities. int GetIntensities(int index,std::vector& intensity_vec); /*! Retrieve a CEL file intensity. * @param x The X coordinate. * @param y The Y coordinate. * @return The CEL file intensity. */ float GetIntensity(int x, int y); /*! Retrieve a CEL file stdv value. * @param index The index to the CEL file entries. * @return The CEL file stdv value. */ float GetStdv(int index); /*! Retrieve a CEL file stdv value. * @param x The X coordinate. * @param y The Y coordinate. * @return The CEL file stdv value. */ float GetStdv(int x, int y); /*! Retrieve a CEL file pixel count. * @param index The index to the CEL file entries. * @return The CEL file pixel count. */ short GetPixels(int index); /*! Retrieve a CEL file pixel count. * @param x The X coordinate. * @param y The Y coordinate. * @return The CEL file pixel count. */ short GetPixels(int x, int y); // Accessors for the mask/outlier flags /*! Retrieve a CEL file mask flag. * @param x The X coordinate. * @param y The Y coordinate. * @return True if the feature is masked. */ bool IsMasked(int x, int y); /*! Retrieve a CEL file mask flag. * @param index The index to the CEL file entries. * @return True if the feature is masked. */ bool IsMasked(int index); /*! Retrieve a CEL file outlier flag. * @param x The X coordinate. * @param y The Y coordinate. * @return True if the feature is an outlier. */ bool IsOutlier(int x, int y); /*! Retrieve a CEL file outlier flag. * @param index The index to the CEL file entries. * @return True if the feature is an outlier. */ bool IsOutlier(int index); // For reading a file. /*! Closes the file */ void Close(); /*! Returns the file size. * @return The file size. */ unsigned int GetFileSize(); /*! Check if the file exists. * @return True if the file exists. */ bool Exists(); /*! Reads the header of the CEL file. * @return True if successful. */ bool ReadHeader(); /*! Reads the CEL file. * @param bIncludeMaskAndOutliers Flag to indicate if the mask and outlier sections should also be read. * @return True if successful. */ bool Read(bool bIncludeMaskAndOutliers=true); /*! Determine CEL file format and call appropriate function to read file using the specified file name. * @param filename The name of the file. * @param state Reading state * @return bool true if success, false if fail * * \a state can be one or combination of the following values: * CEL_ALL Read all information in file (default) * CEL_DATA Read header and intensities only * CEL_OUTLIER Read header, intensities and outliers * CEL_MASK Read header, intensities and masked cells */ bool ReadEx(const char *filename=0, int state=CEL_ALL); /*! Returns the reading state. * @return The reading state. */ int GetReadState(); // For writing a new CEL file /*! Clears the members. */ void Clear(); /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ affymetrix_calvin_io::GenericData *GetGenericData(); /*! Returns the list of parameters associated with a data set, empty for GCOS files * @param setName The data set name * @return The list of parameters */ affymetrix_calvin_parameter::ParameterNameValueTypeList GetDataSetParameters(const std::wstring &setName); /*! Sets the active data group for a multi-group CEL file. Default is the first group. */ void SetActiveDataGroup(const std::wstring &groupName); /*! Is this a multi-color CEL file? * @return True if it is multi-color */ bool IsMultiColor(); /*! Returns a list of the channel (ie data group) names * @return list of channel names */ WStringVector GetChannels(); protected: /*! Factory method to create the appropriate adapter class. * @exception UnableToOpenFileException */ void CreateAdapter(); /*! Delete the adapter member */ void DeleteAdapter(); /*! Check the adapter and throw exception if not set * @exception FileNotOpenException */ void CheckAdapter() const; /*! */ void FillParameterList(); private: /*! Pointer to the adapter */ IFusionCELDataAdapter* adapter; /*! CEL file name including a relative path or the full path. */ std::string filename; /*! A separate parallel parameter list that is sync'ed with the list in the adapter */ FusionTagValuePairTypeList parameterList; /*! Indicates whether the parameter list has been read from the adapter */ bool parameterListRead; }; } #endif //_AffymetrixFusionCELData_HEADER_ affxparser/src/fusion/calvin_files/fusion/src/FusionCELDataAdapterInterface.h0000644000175200017520000002477114516003651030447 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionCELDataInterface_HEADER_ #define _AffymetrixFusionCELDataInterface_HEADER_ #include "calvin_files/data/src/GenericData.h" #include "calvin_files/fusion/src/FusionCoords.h" #include "calvin_files/fusion/src/FusionTagValuePairType.h" #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include // /*! \file FusionCELDataAdapterInterface.h This file defines the interface between * the FusionCELData class and the Calvin and GCOS adapter classes. */ namespace affymetrix_fusion_io { /*! Fusion Entry Type */ typedef struct _FusionCELFileEntryType { /*! Intensity value */ float Intensity; /*! standard deviation value */ float Stdv; /*! pixels */ short Pixels; } FusionCELFileEntryType; /*! \brief Adapter that wraps both GCOS and Calvin cell file readers and contains the interfaces to the data.*/ class IFusionCELDataAdapter { public: /*! \brief Virtual destructor. */ virtual ~IFusionCELDataAdapter() {}; /*! \brief Can this object read the file * \return If the cell file can be read. */ virtual bool CanReadFile() = 0; /*! \brief Set the cell file name. * \param value The cell file name to be set */ virtual void SetFileName(const std::string& value) = 0; /*! \brief Get the cell file name. * \return The currently set cell file name. */ virtual std::string GetFileName() const = 0; /*! \brief Get the id of the file (only valid for Command Console "calvin" files) * \return The unique file id. */ virtual affymetrix_calvin_utilities::AffymetrixGuidType GetFileId() = 0; /*! \brief Set the error message. * \param value The error message to be set. */ virtual void SetError(const wchar_t* value) = 0; /*! \brief Get the currently set error message string. * \return The error message. */ virtual std::wstring GetError() = 0; /*! \brief Get the header key. * \param key Name of header value. * \return The header key. */ virtual std::wstring GetHeaderKey(const wchar_t* key) = 0; /*! \brief Get the version of the cell file. * \return The cell file version. */ virtual int GetVersion() = 0; /*! \brief Get the number of columns. * \return The number of columns */ virtual int GetCols() = 0; /*! \brief Get the number of rows. * \return The number of rows. */ virtual int GetRows() = 0; /*! \brief Get number of cells * \return The number of cells */ virtual int GetNumCells() = 0; /*! \brief Get the header string. * \return The header as a string. */ virtual std::wstring GetHeader() = 0; /*! \brief Get the algorithm name. * \return The algorithm name. */ virtual std::wstring GetAlg() = 0; /*! \brief Get the algorithm version. * \return The algorithm version. */ virtual std::wstring GetAlgVer() = 0; /*! \brief Get the parameters. * \return The parameters used for creating the cell file. */ virtual std::wstring GetParams() = 0; /*! \brief Get a parameter. * \param tag Parameter name associated with a parameter value. * \return The parameter value. */ virtual std::wstring GetAlgorithmParameter(const wchar_t *tag) = 0; /*! \brief Gets a parameter. * \param index The index to the parameter array. * \return The parammeter value. */ virtual std::wstring GetAlgorithmParameterTag(int index) = 0; /*! \brief Get number of parameters. * \return The Parameter name. */ virtual int GetNumberAlgorithmParameters() = 0; /*! \brief Get the number of parameters. * \return The number of parameters. */ virtual std::wstring GetAlgorithmParameters() = 0; /*! \brief Get parameters. * \param values Collection of name/value type parameter list. */ virtual void GetParameters(FusionTagValuePairTypeList& values) = 0; /*! \brief Get the DatHeader string. * \return The DatHeader string. */ virtual std::wstring GetDatHeader() = 0; /*! \brief Get chip type. * \return The chip type of the cell. */ virtual std::wstring GetChipType() = 0; /*! \brief Get the master file * \return master file name */ virtual std::wstring GetMasterFileName() = 0; /*! \brief Get the library package * \return library package name */ virtual std::wstring GetLibraryPackageName() = 0; /*! \brief Get cell margin. \return The cell margin. */ virtual int GetCellMargin() = 0; /*! \brief Get number of outliers. * \return The number of outliers. */ virtual unsigned int GetNumOutliers() = 0; /*! \brief Get number of masked cells. * \return The number of masked cells. */ virtual unsigned int GetNumMasked() = 0; /*! \brief Get the grid coordinates. * \return Returns the grid coordinates. */ virtual FGridCoords GetGridCorners() = 0; // Index/position conversions /*! \brief Translate index to X. * \param index The index to translate for x. * \return The translated index to x value. */ virtual int IndexToX(int index) = 0; /*! \brief Translate index to Y. * \param index The index to translate for y. * \return The translated index to y value. */ virtual int IndexToY(int index) = 0; /*! \brief Translate X and Y to an index. * \param x X coordinate. * \param y Y coordinate. * \return The translated index from x and y. */ virtual int XYToIndex(int x, int y) = 0; // Accessors for intensity information. /*! \brief Get entry by index. * \param index Entry index. * \param entry Entry to be filled from index. */ virtual void GetEntry(int index, FusionCELFileEntryType &entry) = 0; /*! \brief Get entry by x and y. * \param x X position. * \param y Y position. * \param entry Entry to be filled from x and y. */ virtual void GetEntry(int x, int y, FusionCELFileEntryType &entry) = 0; /*! \brief Get intensity by index position. * \param index Location of intensity * \return The intensity value. */ virtual float GetIntensity(int index) = 0; /*! @brief Get a vector of intensities * @param index index of the starting intensity. * @param intensities vector to fill, its size is the count. * @return non-zero on error. */ virtual int GetIntensities(int index, std::vector& intensities) = 0; /*! \brief Get intensity by x, y position. * \param x X position. * \param y Y position. * \return The intensity value. */ virtual float GetIntensity(int x, int y) = 0; /*! \brief Get standard deviation by index position. * \param index Location of stdv. * \return The standard deviation value. */ virtual float GetStdv(int index) = 0; /*! \brief Get standard deviation by x, y position. * \param x X position. * \param y Y position. * \return The standard deviation value. */ virtual float GetStdv(int x, int y) = 0; /*! \brief Get pixel by index position. * \param index Location of pixel. * \return The pixel value. */ virtual short GetPixels(int index) = 0; /*! \brief Get pixel x, y position. * \param x X position. * \param y Y position. * \return The pixel value. */ virtual short GetPixels(int x, int y) = 0; // Accessors for the mask/outlier flags /*! \brief Get masked x, y position. * \param x X position. * \param y Y position. * \return Is index position masked. */ virtual bool IsMasked(int x, int y) = 0; /*! \brief Check if masked by index position. * \param index Location to check. * \return Is index position masked. */ virtual bool IsMasked(int index) = 0; /*! \brief Check if outlier by index position. * \param x X position. * \param y Y position. * \return Is index position an outlier. */ virtual bool IsOutlier(int x, int y) = 0; /*! \brief Check if outlier by index position. * \param index Location to check. * \return Is index position an outlier. */ virtual bool IsOutlier(int index) = 0; // For reading a file. /*! Close the cell file. */ virtual void Close() = 0; /*! \brief Close cell file. */ virtual bool ReadHeader() = 0; /*! \brief Read the cell file. * \param bIncludeMaskAndOutliers Flag indicates whether to include in the read, the reading of outliers and masked items. * \return If the read completed successfully. */ virtual bool Read(bool bIncludeMaskAndOutliers) = 0; /*! \brief read cell file. * * The state flag is used for GCOS files only. * * \param filename Cell file name to read. * \param state [=CEL_ALL] Reading state * \return If the read completed successfully. * \a nState can be one or combination of the following values:\n\n * CEL_ALL Read all information in file (default)\n * CEL_DATA Read header and intensities only\n * CEL_OUTLIER Read header, intensities and outliers\n * CEL_MASK Read header, intensities and masked cells\n\n */ virtual bool ReadEx(const char *filename, int state) = 0; /*! \brief Get the reading state * \return The reading state. */ virtual int GetReadState() = 0; /*! \brief clears the members. */ virtual void Clear() = 0; /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ virtual affymetrix_calvin_io::GenericData *GetGenericData() = 0; /*! Returns the list of parameters associated with a data set, empty for GCOS files * @param setName The data set name * @return The list of parameters */ virtual affymetrix_calvin_parameter::ParameterNameValueTypeList GetDataSetParameters(const std::wstring &setName) = 0; /*! Sets the active data group for a multi-group CEL file. Default is the first group. */ virtual void SetActiveDataGroup(const std::wstring &groupName) = 0; /*! Is this a multi-color CEL file? * @return True if it is multi-color */ virtual bool IsMultiColor() = 0; /*! Returns a list of the channel (ie data group) names * @return list of channel names */ virtual WStringVector GetChannels() = 0; }; } #endif //_AffymetrixFusionCELDataInterface_HEADER_ affxparser/src/fusion/calvin_files/fusion/src/FusionCHPData.cpp0000644000175200017520000000770214516003651025662 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionCHPData.h" // #include "calvin_files/parsers/src/GenericFileReader.h" #include "calvin_files/utils/src/FileUtils.h" // using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_io; /*! The head of the CHP data objects. */ FusionCHPDataReg *FusionCHPDataReg::m_Head = NULL; /* * Store the ids */ void FusionCHPDataReg::SetFileTypeIds(const AffymetrixGuidTypeList &fileTypeIds) { fileTypeIdentifiers = fileTypeIds; } /* * Set the members. The head is the pointer to this class. */ FusionCHPDataReg::FusionCHPDataReg() : m_Next(m_Head) { m_Head = this; } FusionCHPDataReg::~FusionCHPDataReg() { fileTypeIdentifiers.clear(); } /* * Read the header and return the file type id. If the file fails to read * it must be an old CHP file, just return a blank for the file type id. */ FusionCHPData *FusionCHPDataReg::CreateObject(const AffymetrixGuidType &id) { // Find the matching CHP data object. for (FusionCHPDataReg *p=m_Head; p != NULL; p = p->m_Next) { AffymetrixGuidTypeList::iterator it; for (it=p->fileTypeIdentifiers.begin(); it!=p->fileTypeIdentifiers.end(); ++it) { if (*it == id) { FusionCHPData *chp = p->MakeObject(); chp->fileTypeIdentifiers = p->fileTypeIdentifiers; chp->fileTypeIdentifier = id; if (chp) { return chp; } } } } // Go back now and find the generic reader (the one with no file type identifiers) for (FusionCHPDataReg *p=m_Head; p != NULL; p = p->m_Next) { if (p->fileTypeIdentifiers.size() == 0) { FusionCHPData *chp = p->MakeObject(); chp->fileTypeIdentifier = id; return chp; } } return NULL; } /* * Read the guid from the file. */ bool ReadGuidFromFile(const std::string &fileName, AffymetrixGuidType &guid) { if (FileUtils::Exists(fileName.c_str()) == false) return false; GenericData data; GenericFileReader reader; try { reader.SetFilename(fileName); reader.ReadHeader(data); guid = data.Header().GetGenericDataHdr()->GetFileTypeId(); return true; } catch (...) { guid = ""; return true; } } /* * Read the file type ID from the header of the file. * Now find the data object with the matching id. * Read the file and return the pointer. */ FusionCHPData *FusionCHPDataReg::ReadHeader(const std::string &fileName) { AffymetrixGuidType fileTypeId; if (ReadGuidFromFile(fileName, fileTypeId) == false) return NULL; FusionCHPData *chp = FusionCHPDataReg::CreateObject(fileTypeId); if (chp) { chp->SetFileName(fileName.c_str()); if (chp->ReadHeader() == false) { delete chp; chp = NULL; } } return chp; } /* * Read the file type ID from the header of the file. * Now find the data object with the matching id. * Read the file and return the pointer. */ FusionCHPData *FusionCHPDataReg::Read(const std::string &fileName) { AffymetrixGuidType fileTypeId; if (ReadGuidFromFile(fileName, fileTypeId) == false) return NULL; FusionCHPData *chp = CreateObject(fileTypeId); if (chp) { chp->SetFileName(fileName.c_str()); if (chp->Read() == false) { delete chp; chp = NULL; } } return chp; } affxparser/src/fusion/calvin_files/fusion/src/FusionCHPData.h0000644000175200017520000001133314516003651025322 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #if !defined _AffymetrixFusionCHPData_HEADER_ #define _AffymetrixFusionCHPData_HEADER_ /*! \file FusionCHPData.h This file provides CHP file reading capabilities.*/ #include "calvin_files/data/src/GenericData.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include #include #include // namespace affymetrix_fusion_io { /*! A base class for all CHP data objects. */ class FusionCHPData { protected: /*! Sets the file name. * @param str The file name. */ void SetFileName(const char *str) { filename = str; } /*! Gets the file name. * @return The file name. */ std::string GetFileName() const { return filename; } /*! The CHP file name. */ std::string filename; /*! Reads the contents of the file. * @return True if successfully read. */ virtual bool Read() = 0; /*! Reads the header. * @return True if successfully read. */ virtual bool ReadHeader() = 0; /*! Friend to the registration class as it will call the Read function. */ friend class FusionCHPDataReg; /*! The file type identifiers associated with the CHP files the reader can parse. */ affymetrix_calvin_utilities::AffymetrixGuidTypeList fileTypeIdentifiers; /*! The actual file type identifier in the file. */ affymetrix_calvin_utilities::AffymetrixGuidType fileTypeIdentifier; public: /*! Destructor */ virtual ~FusionCHPData() {} /*! Get the id of the file (only valid for Command Console "calvin" files) * @return The unique file id. */ virtual affymetrix_calvin_utilities::AffymetrixGuidType FileId() = 0; /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ virtual affymetrix_calvin_io::GenericData *GetGenericData() = 0; /*! Gets the class name. */ virtual affymetrix_calvin_utilities::AffymetrixGuidType GetObjectName() = 0; /*! Gets the file type identifiers associated with the CHP files the reader can parse. * @return The ids */ affymetrix_calvin_utilities::AffymetrixGuidTypeList FileTypeIdentifiers() const { return fileTypeIdentifiers; } /*! Gets the file type identifier in the file (blank for GCOS files). * @return The id of the file. */ affymetrix_calvin_utilities::AffymetrixGuidType FileTypeIdentifier() { return fileTypeIdentifier; } }; /*! A class used to self register CHP data classes. */ class FusionCHPDataReg { public: /*! Constructor */ FusionCHPDataReg(); /*! Destructor */ virtual ~FusionCHPDataReg(); /* Sets the file type ids. * @param fileTypeIds The identifiers that the CHP object is compatible with. */ void SetFileTypeIds(const affymetrix_calvin_utilities::AffymetrixGuidTypeList &fileTypeIds); /*! Reads the contents of a CHP file. * @param fileName The full path to the input CHP file. * @return A pointer to the CHP data object. NULL if the read failed. */ static FusionCHPData *Read(const std::string &fileName); /*! Reads the header of a CHP file. * @param fileName The full path to the input CHP file. * @return A pointer to the CHP data object. NULL if the read failed. */ static FusionCHPData *ReadHeader(const std::string &fileName); private: /*! Creates a CHP reading object. * @param fileTypeId The file type in the CHP file. * @return The CHP object, NULL if not able to read the file. */ static FusionCHPData *CreateObject(const affymetrix_calvin_utilities::AffymetrixGuidType &fileTypeId); /*! Makes an CHP data object. * @return The CHP data object. */ virtual FusionCHPData *MakeObject() = 0; /*! A pointer to the first registered CHP reader. */ static FusionCHPDataReg *m_Head; /*! A pointer to the next registered CHP reader. */ FusionCHPDataReg *m_Next; /*! The file type identifiers associated with the CHP files the reader can parse. */ affymetrix_calvin_utilities::AffymetrixGuidTypeList fileTypeIdentifiers; }; } /////////////////////////////////////////////////////////////////////////////////////// #endif affxparser/src/fusion/calvin_files/fusion/src/FusionCHPDataAdapterInterface.h0000644000175200017520000002140214516003651030442 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionCHPDataInterface_HEADER_ #define _AffymetrixFusionCHPDataInterface_HEADER_ /*! \file FusionCHPDataAdapterInterface.h This file defines the interface between * the FusionCHPData class and the Calvin and GCOS adapter classes. */ #include "calvin_files/data/src/CHPData.h" // path required to distinguish between Calvin and GCOS files. #include "calvin_files/data/src/GenericData.h" #include "calvin_files/fusion/src/FusionProbeSetResults.h" #include "calvin_files/fusion/src/FusionTagValuePairType.h" #include "calvin_files/utils/src/AffymetrixGuid.h" #include "calvin_files/utils/src/StringUtils.h" // #include "file/CHPFileData.h" // path required to distinguish between Calvin and GCOS files. // #include #include #include // namespace affymetrix_fusion_io { /*! \brief The types of assays. */ typedef enum _AssayType { FusionExpression, /*! Expression assay */ FusionGenotyping, /*! Genotyping assay */ FusionResequencing, /*! Resequencing assay */ FusionUniversal, /*! Tag (universal) assay */ FusionUnknown /*! Unknown assay type */ } AssayType; /*! \brief Converts a fusion tag/value list to a non-Fusion tag/value list * \param fromList The list of name/value parameters to convert. * \param toList The converted list of name/value parameters. */ void ConvertFusion(FusionTagValuePairTypeList& fromList, TagValuePairTypeList& toList); /*! \brief Converts a GCOS tag/value list to a Fusion tag/value list * \param fromList The list of name/value parameters to convert. * \param toList The converted list of name/value parameters. */ void ConvertGCOS(TagValuePairTypeList& fromList, FusionTagValuePairTypeList& toList); /*! \brief Header class for the Fusion CHP adapter */ class IFusionCHPHeaderAdapter { public: /*! \brief Virtual destructor. */ virtual ~IFusionCHPHeaderAdapter() { }; // Accessors for header information. /*! \brief Get numbers columns. * \return Number of columns. */ virtual int GetCols() const = 0; /*! \brief Get number of rows. * \return Number of rows. */ virtual int GetRows() const = 0; /*! \brief Get the number of probesets. * \return Number of probesets. */ virtual int GetNumProbeSets() const = 0; /*! \brief Get assay type. * \return Assay Type. * * \a Assay Types * FusionExpression - Expression assay * FusionGenotyping - Genotyping assay * FusionResequencing - Resequencing assay * FusionUniversal - Tag (universal) assay * FusionUnknown - Unknown assay type */ virtual AssayType GetAssayType() const = 0; /*! \brief Get chip type. * \return The chip type. */ virtual std::wstring GetChipType() const = 0; /*! \brief Get algorithm name. * \return Algorithm name. */ virtual std::wstring GetAlgName() const = 0; /*! \brief Get algorithm version. * \return Algorithm version number. */ virtual std::wstring GetAlgVersion() const = 0; /*! \brief Gets the algorithm parameters. * \param values Name/Value type list to be filled. */ virtual void GetAlgorithmParameters(affymetrix_fusion_io::FusionTagValuePairTypeList& values) = 0; /*! \brief Gets the algorithm parameter count. * \return Number of algorithm parameters. */ virtual u_int32_t GetAlgorithmParameterCount() = 0; /*! \brief Gets the summary parameter count. * \return Number of summary parameters. */ virtual u_int32_t GetSummaryParameterCount() = 0; /*! \brief Gets summary parameters. * \param values Name/Value type list to be filled. */ virtual void GetSummaryParameters(affymetrix_fusion_io::FusionTagValuePairTypeList& values) = 0; /*! \brief Gets the parent CEL file. * \return Parent CEL file name. */ virtual std::wstring GetParentCellFile() const = 0; /*! \brief Gets the prog ID. * \return Prog ID */ virtual std::wstring GetProgID() const = 0; /*! \brief Gets a specific algorithm parameter given a name/tag. * \return Specific algorithm parameter given a name/tag */ virtual std::wstring GetAlgorithmParameter(const wchar_t *tag) = 0; /*! \brief Gets a specific algorithm parameter given a name/tag. * \param tag Parameter name. * \return Specific algorithm parameter given a name/tag */ virtual std::wstring GetSummaryParameter(const wchar_t *tag) = 0; /*! \brief Gets the background zone information. * \param info Background zone information to be filled. */ virtual void GetBackgroundZoneInfo(affxchp::BackgroundZoneInfo& info) = 0; /*! \brief Gets the list of background zone positions and values. * \param zones List of background zone positions and values to be filled. */ virtual void GetBackgroundZones(affxchp::BackgroundZoneTypeList& zones) = 0; /*! \brief Gets the background value for a given center coordinate. * \param type Background value for a given center coordinate to be filled. * \param x X position of zone. * \param y Y position of zone. */ virtual void GetBackgroundZone(affxchp::BackgroundZoneType& type, int x, int y) = 0; /*! \brief Gets the magic number. * \return Magic number. */ virtual int GetMagic() const = 0; /*! \brief Gets the version number. * \return Version number */ virtual int GetVersion() const = 0; }; //////////////////////////////////////////////////////////////////// /*! \brief Data class for the Fusion CHP adapter */ class IFusionCHPDataAdapter { public: /*! \brief Virtual destructor. */ virtual ~IFusionCHPDataAdapter() {}; /*! \brief Accessors to header. * \return Header object */ virtual IFusionCHPHeaderAdapter& GetHeader() = 0; /*! \brief Can this object read the file. * \return If the cell file can be read. */ virtual bool CanReadFile() = 0; /*! Get the probe set name (only valid for Command Console "calvin" files) * @param index The index to the result object of interest. * @return The probe set name. */ virtual std::string GetProbeSetName(int index) = 0; /*! \brief Returns the expression probe set result * \param index Index to the result object of interest. * \param result Expression result. * \return True if the expression result was found. */ virtual bool GetExpressionResults(int index, affymetrix_fusion_io::FusionExpressionProbeSetResults& result) = 0; /*! \brief Returns the genotyping probe set result * \param index Index to the result object of interest. * \param result Genotyping result. * \return True if the genotyping result was found. */ virtual bool GetGenotypingResults(int index, affymetrix_fusion_io::FusionGenotypeProbeSetResults& result) = 0; /*! \brief Returns the universal (tag array) probe set result * \param index Index to the result object of interest. * \param results Universal result. * \return True if the universal result was found. */ virtual bool GetUniversalResults(int index, affymetrix_fusion_io::FusionUniversalProbeSetResults& results) = 0; /*! \brief Gets resequencing results. * \param results Hold the resequencing results. * \return True if resequencing results were retrieved. */ virtual bool GetResequencingResults(affymetrix_fusion_io::FusionResequencingResults& results) = 0; /*! \brief Functions to read file. * \return True if the cell file was read. */ virtual bool Read() = 0; /*! \brief Reads the header of the CHP file. * \return True if successful */ virtual bool ReadHeader() = 0; /*! \brief Sets the file name. * \param value Full path to the CHP file */ virtual void SetFileName(const std::string& value) = 0; /*! \brief Gets the file name. * \return Full path to the CHP file. */ virtual std::string GetFileName() const = 0; /*! \brief Deallocates any memory used by the class object. */ virtual void Clear() = 0; /*! Get the id of the file (only valid for Command Console "calvin" files) * @return The unique file id. */ virtual affymetrix_calvin_utilities::AffymetrixGuidType FileId() = 0; /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ virtual affymetrix_calvin_io::GenericData *GetGenericData() = 0; }; } #endif //_AffymetrixFusionCHPDataInterface_HEADER_ affxparser/src/fusion/calvin_files/fusion/src/FusionCHPGenericData.cpp0000644000175200017520000000455014516003651027155 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionCHPGenericData.h" // #include "calvin_files/parsers/src/FileException.h" #include "calvin_files/parsers/src/GenericFileReader.h" // using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_utilities; /*! Used to register the CHP reader. */ FusionCHPGenericData::Reg FusionCHPGenericData::reg; /*! The class name. */ static AffymetrixGuidType ObjectName = "FusionCHPGenericData"; /* * Convert to the CHP pointer. */ FusionCHPGenericData * FusionCHPGenericData::FromBase(FusionCHPData *chip) { if (chip != NULL && chip->GetObjectName() == ObjectName) return (FusionCHPGenericData *)chip; return NULL; } AffymetrixGuidType FusionCHPGenericData::GetObjectName() { return ObjectName; } FusionCHPGenericData::FusionCHPGenericData() { } FusionCHPGenericData::~FusionCHPGenericData() { Clear(); } /* * Get the id of the file (only valid for Command Console "calvin" files) */ AffymetrixGuidType FusionCHPGenericData::FileId() { return genericData.FileIdentifier(); } /* * Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ GenericData *FusionCHPGenericData::GetGenericData() { return &genericData; } bool FusionCHPGenericData::Read() { GenericFileReader reader; try { reader.SetFilename(filename); reader.Open(genericData); return true; } catch(...) { return false; } } bool FusionCHPGenericData::ReadHeader() { return Read(); } void FusionCHPGenericData::Clear() { genericData.Clear(); } affxparser/src/fusion/calvin_files/fusion/src/FusionCHPGenericData.h0000644000175200017520000000600714516003651026621 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FusionCHPGenericData_HEADER_ #define _FusionCHPGenericData_HEADER_ #include "calvin_files/data/src/GenericData.h" #include "calvin_files/fusion/src/FusionCHPData.h" // #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affymetrix_fusion_io { /*! This class provides storage and reading capabilities for CHP files */ class FusionCHPGenericData : public FusionCHPData { public: /*! Destructor */ ~FusionCHPGenericData(); /*! Get the id of the file (only valid for Command Console "calvin" files) * @return The unique file id. */ affymetrix_calvin_utilities::AffymetrixGuidType FileId(); /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ affymetrix_calvin_io::GenericData *GetGenericData(); /*! Gets the class name. */ affymetrix_calvin_utilities::AffymetrixGuidType GetObjectName(); /*! Deallocates any memory used by the class object */ void Clear(); /*! Gets the generic data object. * @return The generic data object. */ affymetrix_calvin_io::GenericData &GetData() { return genericData; } private: /*! Constructor */ FusionCHPGenericData(); /*! The generic file data object. */ affymetrix_calvin_io::GenericData genericData; /*! Reads the CHP file. * @return True if successful. */ bool Read(); /*! Reads the header of the CHP file * @return True if successful */ bool ReadHeader(); /*! A class to register a generic CHP reader. */ class Reg : public FusionCHPDataReg { public: /*! Constructor - register the file type. */ Reg() : FusionCHPDataReg() { std::list ids; SetFileTypeIds(ids); } /*! Creates a generic CHP object. * @return The generic CHP object. */ FusionCHPData *MakeObject() { return new FusionCHPGenericData; } }; /*! The one and only registration object. This registers the class as a CHP reader. */ static Reg reg; public: /*! Converts the type to the generic CHP type. * @param chip The pointer to the CHP data object. * @return The generic CHP data type or NULL if not compatible. */ static FusionCHPGenericData * FromBase(FusionCHPData *chip); }; } #endif affxparser/src/fusion/calvin_files/fusion/src/FusionCHPLegacyData.cpp0000644000175200017520000002106714516003651027007 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionCHPLegacyData.h" // #include "calvin_files/fusion/src/CalvinAdapter/CalvinCHPDataAdapter.h" #include "calvin_files/fusion/src/FusionTagValuePairType.h" #include "calvin_files/fusion/src/GCOSAdapter/GCOSCHPDataAdapter.h" #include "calvin_files/parsers/src/FileException.h" #include "calvin_files/utils/src/StringUtils.h" // #include #include #include // using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_exceptions; using namespace affymetrix_calvin_utilities; /*! Used to register the CHP reader. */ FusionCHPLegacyData::Reg FusionCHPLegacyData::reg; /*! The class name. */ static AffymetrixGuidType ObjectName = "FusionCHPLegacyData"; /*! Constructor - register the legacy file type. */ namespace affymetrix_fusion_io { FusionCHPLegacyData::Reg ::Reg() : FusionCHPDataReg() { std::list ids; ids.push_back(CHP_EXPRESSION_ASSAY_TYPE); ids.push_back(CHP_RESEQUENCING_ASSAY_TYPE); ids.push_back(CHP_GENOTYPING_ASSAY_TYPE); ids.push_back(CHP_UNIVERSAL_ASSAY_TYPE); ids.push_back(""); SetFileTypeIds(ids); } }; /* */ FusionCHPHeader::FusionCHPHeader() { adapter = 0; } /* */ FusionCHPHeader::~FusionCHPHeader() { } /* * Convert to the CHP pointer. */ FusionCHPLegacyData * FusionCHPLegacyData::FromBase(FusionCHPData *chip) { if (chip != NULL && chip->GetObjectName() == ObjectName) return (FusionCHPLegacyData *)chip; return NULL; } AffymetrixGuidType FusionCHPLegacyData::GetObjectName() { return ObjectName; } // Accessors for header information. /* */ int FusionCHPHeader::GetVersion() const { CheckAdapter(); return adapter->GetHeader().GetVersion(); } /* */ int FusionCHPHeader::GetCols() const { CheckAdapter(); return adapter->GetHeader().GetCols(); } /* */ int FusionCHPHeader::GetRows() const { CheckAdapter(); return adapter->GetHeader().GetRows(); } int FusionCHPHeader::GetNumProbeSets() const { CheckAdapter(); return adapter->GetHeader().GetNumProbeSets(); } AssayType FusionCHPHeader::GetAssayType() const { CheckAdapter(); return adapter->GetHeader().GetAssayType(); } std::wstring FusionCHPHeader::GetChipType() const { CheckAdapter(); return adapter->GetHeader().GetChipType(); } std::wstring FusionCHPHeader::GetAlgName() const { CheckAdapter(); return adapter->GetHeader().GetAlgName(); } std::wstring FusionCHPHeader::GetAlgVersion() const { CheckAdapter(); return adapter->GetHeader().GetAlgVersion(); } void FusionCHPHeader::AlgorithmParameters(FusionTagValuePairTypeList& valuesFusion) { CheckAdapter(); adapter->GetHeader().GetAlgorithmParameters(valuesFusion); } u_int32_t FusionCHPHeader::AlgorithmParameterCount() { CheckAdapter(); return adapter->GetHeader().GetAlgorithmParameterCount(); } u_int32_t FusionCHPHeader::SummaryParameterCount() { CheckAdapter(); return adapter->GetHeader().GetSummaryParameterCount(); } void FusionCHPHeader::SummaryParameters(FusionTagValuePairTypeList& p) { CheckAdapter(); adapter->GetHeader().GetSummaryParameters(p); } std::wstring FusionCHPHeader::GetParentCellFile() const { CheckAdapter(); return adapter->GetHeader().GetParentCellFile(); } std::wstring FusionCHPHeader::GetProgID() const { CheckAdapter(); return adapter->GetHeader().GetProgID(); } std::wstring FusionCHPHeader::GetAlgorithmParameter(const wchar_t* tag) { CheckAdapter(); return adapter->GetHeader().GetAlgorithmParameter(tag); } std::wstring FusionCHPHeader::GetSummaryParameter(const wchar_t* tag) { CheckAdapter(); return adapter->GetHeader().GetSummaryParameter(tag); } void FusionCHPHeader::GetBackgroundZoneInfo(BackgroundZoneInfo& info) { CheckAdapter(); adapter->GetHeader().GetBackgroundZoneInfo(info); } void FusionCHPHeader::GetBackgroundZones(BackgroundZoneTypeList& zones) { CheckAdapter(); adapter->GetHeader().GetBackgroundZones(zones); } void FusionCHPHeader::GetBackgroundZone(BackgroundZoneType& type, int x, int y) { CheckAdapter(); adapter->GetHeader().GetBackgroundZone(type,x,y); } int FusionCHPHeader::GetMagic() const { CheckAdapter(); return adapter->GetHeader().GetMagic(); } void FusionCHPHeader::Clear() { adapter = 0; } void FusionCHPHeader::CheckAdapter() const { if (adapter == 0) { FileNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } ///////////////////////////// //// Data FusionCHPHeader& FusionCHPLegacyData::GetHeader() { CheckAdapter(); return header; } void FusionCHPLegacyData::CheckAdapter() const { if (adapter == 0) { FileNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } FusionCHPLegacyData::FusionCHPLegacyData() { adapter = 0; } FusionCHPLegacyData::~FusionCHPLegacyData() { DeleteAdapter(); } AffymetrixGuidType FusionCHPLegacyData::FileId() { CheckAdapter(); return adapter->FileId(); } GenericData *FusionCHPLegacyData::GetGenericData() { CheckAdapter(); return adapter->GetGenericData(); } std::string FusionCHPLegacyData::GetProbeSetName(int index) { CheckAdapter(); return adapter->GetProbeSetName(index); } bool FusionCHPLegacyData::GetExpressionResults(int index, FusionExpressionProbeSetResults& result) { CheckAdapter(); return adapter->GetExpressionResults(index, result); } bool FusionCHPLegacyData::GetGenotypingResults(int index, FusionGenotypeProbeSetResults& result) { CheckAdapter(); return adapter->GetGenotypingResults(index, result); } bool FusionCHPLegacyData::GetUniversalResults(int index, FusionUniversalProbeSetResults& result) { CheckAdapter(); return adapter->GetUniversalResults(index, result); } bool FusionCHPLegacyData::GetReseqResults(FusionResequencingResults &results) { CheckAdapter(); return adapter->GetResequencingResults(results); } bool FusionCHPLegacyData::Read() { CreateAdapter(); adapter->SetFileName(filename); return adapter->Read(); } bool FusionCHPLegacyData::ReadHeader() { CreateAdapter(); adapter->SetFileName(filename); return adapter->ReadHeader(); } void FusionCHPLegacyData::Clear() { CheckAdapter(); adapter->Clear(); } void FusionCHPLegacyData::CreateAdapter() { if (adapter) return; // Create a Calvin adapter IFusionCHPDataAdapter* calvinAdapter = new CalvinCHPDataAdapter(); if (calvinAdapter) { calvinAdapter->SetFileName(filename); if (calvinAdapter->CanReadFile()) { adapter = calvinAdapter; header.Clear(); header.adapter = calvinAdapter; } else { delete calvinAdapter; IFusionCHPDataAdapter* gcosAdapter = new GCOSCHPDataAdapter(); if (gcosAdapter) { gcosAdapter->SetFileName(filename); if (gcosAdapter->CanReadFile()) { adapter = gcosAdapter; header.Clear(); header.adapter = gcosAdapter; } else { adapter = 0; UnableToOpenFileException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); if(gcosAdapter!= NULL) delete gcosAdapter; gcosAdapter = 0; throw e; } } else { adapter = 0; UnableToOpenFileException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); if(gcosAdapter!= NULL) delete gcosAdapter; gcosAdapter = 0; throw e; } } } } void FusionCHPLegacyData::DeleteAdapter() { if (adapter) delete adapter; adapter = 0; header.adapter = 0; } affxparser/src/fusion/calvin_files/fusion/src/FusionCHPLegacyData.h0000644000175200017520000002014014516003651026443 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #if !defined _AffymetrixFusionCHPLegacyData_HEADER_ #define _AffymetrixFusionCHPLegacyData_HEADER_ /*! \file FusionCHPLegacyData.h This file provides CHP file reading capabilities.*/ ////////////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionCHPData.h" #include "calvin_files/fusion/src/FusionCHPDataAdapterInterface.h" // #include #include #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affymetrix_fusion_io { class IFusionCHPDataAdapter; ////////////////////////////////////////////////////////////////////// /*! This class provides storage for the CHP file header */ class FusionCHPHeader { public: /*! Constructor */ FusionCHPHeader(); /*! Destructor */ ~FusionCHPHeader(); /*! Gets the number of feature columns * @return The number of feature columns */ int GetCols() const; /*! Gets the number of feature rows * @return The number of feature rows */ int GetRows() const; /*! Gets the number of probe sets * @return The number of probe sets */ int GetNumProbeSets() const; /*! Gets the assay type * @return The assay type */ AssayType GetAssayType() const; /*! Gets the chip type * @return The chip type */ std::wstring GetChipType() const; /*! Gets the algorithm name * @return The algorithm name */ std::wstring GetAlgName() const; /*! Gets the algorithm version * @return The algorithm version */ std::wstring GetAlgVersion() const; /*! Gets the algorithm parameters * @param values The fusion tag value pair type list */ void AlgorithmParameters(FusionTagValuePairTypeList& values); /*! Gets the algorithm parameters count * @return The number of algorithm parameters. */ u_int32_t AlgorithmParameterCount(); /*! Gets the summary parameters * @return The summary parameters */ void SummaryParameters(FusionTagValuePairTypeList& p); /*! Gets the summary parameters count * @return The number of summary parameters. */ u_int32_t SummaryParameterCount(); /*! Gets the parent CEL file * @return The parent CEL file */ std::wstring GetParentCellFile() const; /*! Gets the prog ID * @return The prog ID */ std::wstring GetProgID() const; /*! Gets a specific algorithm parameter given a name/tag * @return The specific algorithm parameter given a name/tag */ std::wstring GetAlgorithmParameter(const wchar_t* tag); /*! Gets a specific summary parameter given a name/tag * @return The specific summary parameter given a name/tag */ std::wstring GetSummaryParameter(const wchar_t* tag); /*! Gets the background zone information * @return The background zone information */ void GetBackgroundZoneInfo(affxchp::BackgroundZoneInfo& info); /*! Gets the list of background zone positions and values * @return The list of background zone positions and values */ void GetBackgroundZones(affxchp::BackgroundZoneTypeList& zones); /*! Gets the background value for a given center coordinate * @return The background value for a given center coordinate */ void GetBackgroundZone(affxchp::BackgroundZoneType& type, int x, int y); /*! Gets the magic number * @return The magic number */ int GetMagic() const; /*! Gets the version number * @return The version number */ int GetVersion() const; /*! Check the adapter and throw exception if not set * @exception FileNotOpenException */ void CheckAdapter() const; /*! Clears the members. */ void Clear(); protected: /* Friend to the data object. */ friend class FusionCHPLegacyData; /*! The adapter interface. */ IFusionCHPDataAdapter* adapter; }; //////////////////////////////////////////////////////////////////// /*! This class provides storage and reading capabilities for CHP files */ class FusionCHPLegacyData : public FusionCHPData { private: /*! Constructor */ FusionCHPLegacyData(); public: /*! Destructor */ ~FusionCHPLegacyData(); /*! Get the id of the file (only valid for Command Console "calvin" files) * @return The unique file id. */ affymetrix_calvin_utilities::AffymetrixGuidType FileId(); /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ affymetrix_calvin_io::GenericData *GetGenericData(); /*! Gets the class name. */ affymetrix_calvin_utilities::AffymetrixGuidType GetObjectName(); /*! Get the probe set name (only valid for Command Console "calvin" files of expression or genotype assay) * @param index The index to the result object of interest. * @return The probe set name. */ std::string GetProbeSetName(int index); /*! Returns the expression probe set result * @param index The index to the result object of interest. * @param result The expression result. * @return True if the expression result was found. */ bool GetExpressionResults(int index, FusionExpressionProbeSetResults& result); /*! Returns the genotyping probe set result * @param index The index to the result object of interest. * @param result The genotyping result. * @return True if the genotyping result was found. */ bool GetGenotypingResults(int index, FusionGenotypeProbeSetResults& result); /*! Returns the universal (tag array) probe set result * @param index The index to the result object of interest. * @param result The universal result. * @return True if the universal result was found. */ bool GetUniversalResults(int index, FusionUniversalProbeSetResults& result); /*! Returns the resequencing results. * @param results The results. * @return True if the reseq result was found. */ bool GetReseqResults(FusionResequencingResults &results); /*! Deallocates any memory used by the class object */ void Clear(); /*! Gets the headre. */ FusionCHPHeader& GetHeader(); private: /*! Opens the file for reading. * @param bReadHeaderOnly Flag to indicate if the header is to be read only. * @return True if successful. */ bool Open(bool bReadHeaderOnly=false); /*! Check the adapter and throw exception if not set * @exception FileNotOpenException */ void CheckAdapter() const; /*! Create the adapter. */ void CreateAdapter(); /*! Delete the adapter. */ void DeleteAdapter(); /*! Reads the CHP file. * @return True if successful. */ bool Read(); /*! Reads the header of the CHP file * @return True if successful */ bool ReadHeader(); /*! The fusion CHP adapter. */ IFusionCHPDataAdapter* adapter; /*! The CHP header */ FusionCHPHeader header; /*! A class to register the legacy CHP reader. */ class Reg : public FusionCHPDataReg { public: /*! Constructor - register the legacy file type. */ Reg(); /*! Creates a legacy CHP object. * @return The legacy CHP object. */ FusionCHPData *MakeObject() { return new FusionCHPLegacyData; } }; /*! The one and only registration object. This registers the class as a CHP reader. */ static Reg reg; public: /*! Converts the type to the legacy CHP type. * @param chip The pointer to the CHP data object. * @return The legacy CHP data type or NULL if not compatible. */ static FusionCHPLegacyData * FromBase(FusionCHPData *chip); }; //////////////////////////////////////////////////////////////////// } // namespace //////////////////////////////////////////////////////////////////// #endif affxparser/src/fusion/calvin_files/fusion/src/FusionCHPMultiDataAccessor.cpp0000644000175200017520000000675514516003651030367 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionCHPMultiDataAccessor.h" // #include "calvin_files/fusion/src/FusionCHPData.h" #include "calvin_files/fusion/src/FusionCHPMultiDataData.h" // using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_io; using namespace std; FusionCHPMultiDataAccessor::FusionCHPMultiDataAccessor() { } FusionCHPMultiDataAccessor::~FusionCHPMultiDataAccessor() { } bool FusionCHPMultiDataAccessor::Initialize(const vector &chps) { // Clear the map snpNameIndexMap.clear(); // Store the chp file names. chpFileNames = chps; if (chps.size() == 0) return false; // Read the first chp file. FusionCHPData *chp = FusionCHPDataReg::Read(chpFileNames[0]); if (chp == NULL) return false; FusionCHPMultiDataData *mchp = FusionCHPMultiDataData::FromBase(chp); if (mchp == NULL) { delete chp; return false; } // Extract the probe set names int n = mchp->GetEntryCount(GenotypeMultiDataType); for (int i=0; iGetProbeSetName(GenotypeMultiDataType, i)] = i; // Close the file and return delete mchp; return true; } void FusionCHPMultiDataAccessor::ExtractData(const vector &snps, vector > &calls, vector > &confidences) { int nchps = (int)chpFileNames.size(); calls.resize(nchps); confidences.resize(nchps); // Create a vector of CHP file indicies given the SNP names. This is // done to reduce the number of lookups in the map object. int nsnps = (int)snps.size(); vector snpIndicies(nsnps); for (int isnp=0; isnpGetGenoCall(GenotypeMultiDataType, snpIndicies[isnp]); confidences[ichp][isnp] = mchp->GetGenoConfidence(GenotypeMultiDataType, snpIndicies[isnp]); } // Close the file delete mchp; } } affxparser/src/fusion/calvin_files/fusion/src/FusionCHPMultiDataAccessor.h0000644000175200017520000000423014516003651030016 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FusionCHPMultiDataAccessor_HEADER_ #define _FusionCHPMultiDataAccessor_HEADER_ #include // #include #include #include #include // namespace affymetrix_fusion_io { /*! This class provides a simple interface to extract SNP calls and confidence values from a multi data CHP file. */ class FusionCHPMultiDataAccessor { private: /*! The list of CHP file names to extract the data from. */ std::vector chpFileNames; /*! A map of SNP name to CHP file index. */ std::map snpNameIndexMap; public: /*! Constructor */ FusionCHPMultiDataAccessor(); /*! Destructor */ ~FusionCHPMultiDataAccessor(); /*! Initialize the map of SNP names to indicies. * @param chps The list of CHP files to extract data from. */ bool Initialize(const std::vector &chps); /*! Extract the calls and confidences for each input SNP. * @param snps The list of snps to extract data for. * @param calls A matrix to hold the calls. * @param confidences A matrix to hold the confidence values. */ void ExtractData(const std::vector &snps, std::vector > &calls, std::vector > &confidences); }; } #endif affxparser/src/fusion/calvin_files/fusion/src/FusionCHPMultiDataData.cpp0000644000175200017520000000413714516003651027466 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionCHPMultiDataData.h" // #include "calvin_files/parsers/src/CHPMultiDataFileReader.h" // using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_exceptions; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_io; /*! Used to register the CHP reader. */ FusionCHPMultiDataData::Reg FusionCHPMultiDataData::reg; /*! The class name. */ static AffymetrixGuidType ObjectName = "FusionCHPMultiDataData"; /* * Construct the class. */ FusionCHPMultiDataData::FusionCHPMultiDataData() { } /* * Clean up any used memory. */ FusionCHPMultiDataData::~FusionCHPMultiDataData() { Clear(); } /* * Just call read. */ bool FusionCHPMultiDataData::ReadHeader() { return Read(); } /* * Read the file. */ bool FusionCHPMultiDataData::Read() { CHPMultiDataFileReader reader; reader.SetFilename(filename); try { reader.Read(chpData); return true; } catch(...) { return false; } } /* * Convert to the CHP pointer. */ FusionCHPMultiDataData * FusionCHPMultiDataData::FromBase(FusionCHPData *chip) { if (chip != NULL && chip->GetObjectName() == ObjectName) return (FusionCHPMultiDataData *)chip; return NULL; } AffymetrixGuidType FusionCHPMultiDataData::GetObjectName() { return ObjectName; } affxparser/src/fusion/calvin_files/fusion/src/FusionCHPMultiDataData.h0000644000175200017520000003045514516003651027135 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FusionCHPMultiDataData_HEADER_ #define _FusionCHPMultiDataData_HEADER_ /*! \file FusionCHPMultiDataData.h This file provides CHP file reading capabilities for multi-data CHP files.*/ #include "calvin_files/data/src/CHPMultiDataData.h" #include "calvin_files/fusion/src/FusionCHPData.h" // namespace affymetrix_fusion_io { /*! This class provides storage and reading capabilities for multi-data CHP files */ class FusionCHPMultiDataData : public FusionCHPData { private: /*! Constructor */ FusionCHPMultiDataData(); /*! The CHP object. */ affymetrix_calvin_io::CHPMultiDataData chpData; public: /*! Destructor */ ~FusionCHPMultiDataData(); /*! Get the id of the file (only valid for Command Console "calvin" files) * @return The unique file id. */ affymetrix_calvin_utilities::AffymetrixGuidType FileId() { return chpData.GetGenericData().FileIdentifier(); } /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ affymetrix_calvin_io::GenericData *GetGenericData() { return &chpData.GetGenericData(); } /*! Returns the data set header. * @param dataType The data type. */ affymetrix_calvin_io::DataSetHeader *GetDataSetHeader(affymetrix_calvin_io::MultiDataType dataType) { return chpData.GetDataSetHeader(dataType); } /*! The data set information */ std::map &GetDataSetInfo() { return chpData.GetDataSetInfo(); } /*! The maximum length of a probe set name. * @param dataType The data type * @return The maximum probe set name length */ int GetMaxProbeSetName(affymetrix_calvin_io::MultiDataType dataType) { return chpData.GetMaxProbeSetName(dataType); } /*! Clears the members. */ void Clear() { chpData.Clear(); } /*! Gets the class name. */ affymetrix_calvin_utilities::AffymetrixGuidType GetObjectName(); /*! Gets the name of the algorithm. * @return The algorithm name. */ std::wstring GetAlgName() { return chpData.GetAlgName(); } /*! Gets the algorithm version. * @return The version. */ std::wstring GetAlgVersion() { return chpData.GetAlgVersion(); } /*! Sets the array type */ std::wstring GetArrayType() { return chpData.GetArrayType(); } /*! Gets the algorithm parameters * @return The algoirhtm parameters. */ ParameterNameValueTypeList GetAlgParams() { return chpData.GetAlgParams(); } /*! Gets the summary parameters * @return The summary parameters. */ ParameterNameValueTypeList GetSummaryParams() { return chpData.GetSummaryParams(); } /*! Gets the number of entries (probe sets) * @param dataType The data type */ int32_t GetEntryCount(affymetrix_calvin_io::MultiDataType dataType) { return chpData.GetEntryCount(dataType); } /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The genotype results. */ void GetGenotypeEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataGenotypeData &entry) { chpData.GetGenotypeEntry(dataType, index, entry); } /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number results. */ void GetCopyNumberEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCopyNumberData &entry) { chpData.GetCopyNumberEntry(dataType, index, entry); } /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number results. */ void GetCopyNumberEntryLog2Ratio(affymetrix_calvin_io::MultiDataType dataType, int index, int log2RatioIndex, float *val) { chpData.GetCopyNumberEntryLog2Ratio(dataType, index, val); } /*! Gets the cyto region data. * @param dataType The data type * @param index The row index. * @param entry The cyto region results. */ void GetCytoRegionEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCytoRegionData &entry) { chpData.GetCytoEntry(dataType, index, entry); } /*! Gets the copy number variation region data. * @param dataType The data type * @param index The row index. * @param entry The cyto region results. */ void GetCopyNumberVariationRegionEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataCopyNumberVariationRegionData &entry) { chpData.GetCopyNumberVariationEntry(dataType, index, entry); } /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The expression results. */ void GetExpressionEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::ProbeSetMultiDataExpressionData &entry) { chpData.GetExpressionEntry(dataType, index, entry); } /*! Gets the call of the probe set. * @param dataType The data type * @param index The row index. * @return The call. */ u_int8_t GetGenoCall(affymetrix_calvin_io::MultiDataType dataType, int index) { return chpData.GetGenoCall(dataType, index); } /*! Gets the confidence in the call of the probe set. * @param dataType The data type * @param index The row index. * @return The confidence. */ float GetGenoConfidence(affymetrix_calvin_io::MultiDataType dataType, int index) { return chpData.GetGenoConfidence(dataType, index); } /*! Gets the quantification of the probe set. * @param dataType The data type * @param index The row index. * @return The quantification. */ float GetExpressionQuantification(affymetrix_calvin_io::MultiDataType dataType, int index) { return chpData.GetExpressionQuantification(dataType, index); } /*! Gets the chromosome segment data. * @param dataType The data type * @param index The row index. * @param entry The results. */ void GetChromosomeSegmentEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::ChromosomeSegmentData &entry) { chpData.GetChromosomeSegmentEntry(dataType, index, entry); } /*! Gets the chromosome segment data. * @param dataType The data type * @param index The row index. * @param entry The results. */ void GetChromosomeSegmentEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::ChromosomeSegmentDataEx &entry) { chpData.GetChromosomeSegmentEntry(dataType, index, entry); } /*! Gets the chromosome summary data. * @param dataType The data type * @param index The row index. * @param entry The results. */ void GetChromosomeSummaryEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::ChromosomeMultiDataSummaryData &entry) { chpData.GetChromosomeSummaryEntry(dataType, index, entry); } /*! Gets the familial file entry. * @param dataType The data type * @param index The row index. * @param entry The results. */ void GetFamilialSegmentOverlapEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::FamilialSegmentOverlap &entry) { chpData.GetFamilialSegmentOverlapEntry(dataType, index, entry); } /*! Gets the familial file entry. * @param dataType The data type * @param index The row index. * @param entry The results. */ void GetFamilialSampleEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::FamilialSample &entry) { chpData.GetFamilialSampleEntry(dataType, index, entry); } /*! Gets the DMET probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number results. */ void GetDmetCopyNumberEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::DmetCopyNumberData &entry) { chpData.GetEntry(dataType, index, entry); } /*! Gets the DMET probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number results. */ void GetDmetMultiAllelicEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::DmetMultiAllelicData &entry) { chpData.GetEntry(dataType, index, entry); } /*! Gets the DMET probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number results. */ void GetDmetBiAllelicEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::DmetBiAllelicData &entry) { chpData.GetEntry(dataType, index, entry); } /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number results. */ void GetAllelePeakEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::AllelePeaks &entry) { chpData.GetEntry(dataType, index, entry); } /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The AB signals. */ void GetMarkerABSignalsEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::MarkerABSignals &entry) { chpData.GetEntry(dataType, index, entry); } /*! Gets the probe set data. * @param dataType The data type * @param index The row index. * @param entry The copy number results. */ void GetCytoGenotypeEntry(affymetrix_calvin_io::MultiDataType dataType, int index, affymetrix_calvin_data::CytoGenotypeCallData &entry) { chpData.GetEntry(dataType, index, entry); } /*! Get the probe set name. * @param dataType The data type * @param index The row index. * @return The probe set name. */ std::string GetProbeSetName(affymetrix_calvin_io::MultiDataType dataType, int index) { return chpData.GetProbeSetName(dataType, index); } /*! Get the length of the metric columns. * @param dataType The data type * @param col The column index (of the metric columns) * @return The length. */ int32_t GetMetricColumnLength(affymetrix_calvin_io::MultiDataType dataType, int col) { return chpData.GetMetricColumnLength(dataType, col); } /*! Get the length of the metric columns. * @param dataType The data type * @return The number of columns. */ int32_t GetNumMetricColumns(affymetrix_calvin_io::MultiDataType dataType) { return chpData.GetNumMetricColumns(dataType); } /*! Get the metric column name. * @param dataType The data type * @param colIndex the metric column index * @return The column name */ std::wstring GetMetricColumnName(affymetrix_calvin_io::MultiDataType dataType, int colIndex) { return chpData.GetMetricColumnName(dataType, colIndex); } private: /*! Reads the CHP file. * @return True if successful. */ bool Read(); /*! Reads the header of the CHP file * @return True if successful */ bool ReadHeader(); public: /*! A class to register the MultiData CHP reader. */ class Reg : public FusionCHPDataReg { public: /*! Constructor - register the MultiData file type. */ Reg() : FusionCHPDataReg() { std::list ids; ids.push_back(CHP_MULTI_DATA_TYPE); SetFileTypeIds(ids); } /*! Creates a MultiData CHP object. * @return The MultiData CHP object. */ FusionCHPData *MakeObject() { return new FusionCHPMultiDataData; } }; public: /*! The one and only registration object. This registers the class as a CHP reader. */ static Reg reg; /*! Converts the type to the MultiData CHP type. * @param chip The pointer to the CHP data object. * @return The MultiData CHP data type or NULL if not compatible. */ static FusionCHPMultiDataData * FromBase(FusionCHPData *chip); }; } #endif affxparser/src/fusion/calvin_files/fusion/src/FusionCHPQuantificationData.cpp0000644000175200017520000000507114516003651030556 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionCHPQuantificationData.h" // #include "calvin_files/parsers/src/CHPQuantificationFileReader.h" // using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_exceptions; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_io; /*! Used to register the CHP reader. */ FusionCHPQuantificationData::Reg FusionCHPQuantificationData::reg; /*! The class name. */ static AffymetrixGuidType ObjectName = "FusionCHPQuantificationData"; /* * Construct the class. */ FusionCHPQuantificationData::FusionCHPQuantificationData() { } /* * Clean up any used memory. */ FusionCHPQuantificationData::~FusionCHPQuantificationData() { chpData.Clear(); } /* * Just call read. */ bool FusionCHPQuantificationData::ReadHeader() { return Read(); } /* * Read the file. */ bool FusionCHPQuantificationData::Read() { CHPQuantificationFileReader reader; reader.SetFilename(filename); try { reader.Read(chpData); return true; } catch(...) { return false; } } /* * Convert to the CHP pointer. */ FusionCHPQuantificationData * FusionCHPQuantificationData::FromBase(FusionCHPData *chip) { if (chip != NULL && chip->GetObjectName() == ObjectName) return (FusionCHPQuantificationData *)chip; return NULL; } AffymetrixGuidType FusionCHPQuantificationData::GetObjectName() { return ObjectName; } /* * Get the id of the file (only valid for Command Console "calvin" files) */ AffymetrixGuidType FusionCHPQuantificationData::FileId() { return chpData.GetGenericData().FileIdentifier(); } /* * Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ GenericData *FusionCHPQuantificationData::GetGenericData() { return &chpData.GetGenericData(); } affxparser/src/fusion/calvin_files/fusion/src/FusionCHPQuantificationData.h0000644000175200017520000001021214516003651030214 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FusionCHPQuantificationData_HEADER_ #define _FusionCHPQuantificationData_HEADER_ /*! \file FusionCHPQuantificationData.h This file provides CHP file reading capabilities for CHP files with a single quantification value.*/ #include "calvin_files/data/src/CHPQuantificationData.h" #include "calvin_files/fusion/src/FusionCHPData.h" // namespace affymetrix_fusion_io { /*! This class provides storage and reading capabilities for quantification CHP files */ class FusionCHPQuantificationData : public FusionCHPData { private: /*! Constructor */ FusionCHPQuantificationData(); /*! The CHP object. */ affymetrix_calvin_io::CHPQuantificationData chpData; public: /*! Destructor */ ~FusionCHPQuantificationData(); /*! Get the id of the file (only valid for Command Console "calvin" files) * @return The unique file id. */ affymetrix_calvin_utilities::AffymetrixGuidType FileId(); /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ affymetrix_calvin_io::GenericData *GetGenericData(); /*! Returns the data set header. */ affymetrix_calvin_io::DataSetHeader &GetDataSetHeader() { return chpData.GetDataSetHeader(); } /*! Gets the class name. */ affymetrix_calvin_utilities::AffymetrixGuidType GetObjectName(); /*! Gets the name of the algorithm. * @return The algorithm name. */ std::wstring GetAlgName() { return chpData.GetAlgName(); } /*! Gets the algorithm version. * @return The version. */ std::wstring GetAlgVersion() { return chpData.GetAlgVersion(); } /*! Sets the array type */ std::wstring GetArrayType() { return chpData.GetArrayType(); } /*! Gets the algorithm parameters * @return The algoirhtm parameters. */ ParameterNameValueTypeList GetAlgParams() { return chpData.GetAlgParams(); } /*! Gets the summary parameters * @return The summary parameters. */ ParameterNameValueTypeList GetSummaryParams() { return chpData.GetSummaryParams(); } /*! Gets the number of entries (probe sets) */ int32_t GetEntryCount() { return chpData.GetEntryCount(); } /*! Gets the sequence data. * @param index The row index. * @param entry The quantification value. */ void GetQuantificationEntry(int index, affymetrix_calvin_data::ProbeSetQuantificationData &entry) { chpData.GetQuantificationEntry(index, entry); } private: /*! Reads the CHP file. * @return True if successful. */ bool Read(); /*! Reads the header of the CHP file * @return True if successful */ bool ReadHeader(); public: /*! A class to register the quantification CHP reader. */ class Reg : public FusionCHPDataReg { public: /*! Constructor - register the quantification file type. */ Reg() : FusionCHPDataReg() { std::list ids; ids.push_back(CHP_QUANTIFICATION_TYPE); SetFileTypeIds(ids); } /*! Creates a quantification CHP object. * @return The quantification CHP object. */ FusionCHPData *MakeObject() { return new FusionCHPQuantificationData; } }; /*! The one and only registration object. This registers the class as a CHP reader. */ static Reg reg; public: /*! Converts the type to the quantification CHP type. * @param chip The pointer to the CHP data object. * @return The quantification CHP data type or NULL if not compatible. */ static FusionCHPQuantificationData * FromBase(FusionCHPData *chip); }; } #endif affxparser/src/fusion/calvin_files/fusion/src/FusionCHPQuantificationDetectionData.cpp0000644000175200017520000000533314516003651032416 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionCHPQuantificationDetectionData.h" // #include "calvin_files/parsers/src/CHPQuantificationDetectionFileReader.h" // using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_exceptions; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_io; /*! Used to register the CHP reader. */ FusionCHPQuantificationDetectionData::Reg FusionCHPQuantificationDetectionData::reg; /*! The class name. */ static AffymetrixGuidType ObjectName = "FusionCHPQuantificationDetectionData"; /* * Construct the class. */ FusionCHPQuantificationDetectionData::FusionCHPQuantificationDetectionData() { } /* * Clean up any used memory. */ FusionCHPQuantificationDetectionData::~FusionCHPQuantificationDetectionData() { chpData.Clear(); } /* * Just call read. */ bool FusionCHPQuantificationDetectionData::ReadHeader() { return Read(); } /* * Read the file. */ bool FusionCHPQuantificationDetectionData::Read() { CHPQuantificationDetectionFileReader reader; reader.SetFilename(filename); try { reader.Read(chpData); return true; } catch(...) { return false; } } /* * Convert to the CHP pointer. */ FusionCHPQuantificationDetectionData * FusionCHPQuantificationDetectionData::FromBase(FusionCHPData *chip) { if (chip != NULL && chip->GetObjectName() == ObjectName) return (FusionCHPQuantificationDetectionData *)chip; return NULL; } AffymetrixGuidType FusionCHPQuantificationDetectionData::GetObjectName() { return ObjectName; } /* * Get the id of the file (only valid for Command Console "calvin" files) */ AffymetrixGuidType FusionCHPQuantificationDetectionData::FileId() { return chpData.GetGenericData().FileIdentifier(); } /* * Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ GenericData *FusionCHPQuantificationDetectionData::GetGenericData() { return &chpData.GetGenericData(); } affxparser/src/fusion/calvin_files/fusion/src/FusionCHPQuantificationDetectionData.h0000644000175200017520000001053314516003651032061 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FusionCHPQuantificationDetectionData_HEADER_ #define _FusionCHPQuantificationDetectionData_HEADER_ /*! \file FusionCHPQuantificationDetectionData.h This file provides CHP file reading capabilities for CHP files with a quantification/detection pair.*/ #include "calvin_files/data/src/CHPQuantificationDetectionData.h" #include "calvin_files/fusion/src/FusionCHPData.h" // namespace affymetrix_fusion_io { /*! This class provides storage and reading capabilities for quantification/detection CHP files */ class FusionCHPQuantificationDetectionData : public FusionCHPData { private: /*! Constructor */ FusionCHPQuantificationDetectionData(); /*! The CHP object. */ affymetrix_calvin_io::CHPQuantificationDetectionData chpData; public: /*! Destructor */ ~FusionCHPQuantificationDetectionData(); /*! Get the id of the file (only valid for Command Console "calvin" files) * @return The unique file id. */ affymetrix_calvin_utilities::AffymetrixGuidType FileId(); /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ affymetrix_calvin_io::GenericData *GetGenericData(); /*! Returns the data set header. */ affymetrix_calvin_io::DataSetHeader &GetDataSetHeader() { return chpData.GetDataSetHeader(); } /*! Gets the class name. */ affymetrix_calvin_utilities::AffymetrixGuidType GetObjectName(); /*! Gets the name of the algorithm. * @return The algorithm name. */ std::wstring GetAlgName() { return chpData.GetAlgName(); } /*! Gets the algorithm version. * @return The version. */ std::wstring GetAlgVersion() { return chpData.GetAlgVersion(); } /*! Sets the array type */ std::wstring GetArrayType() { return chpData.GetArrayType(); } /*! Gets the algorithm parameters * @return The algoirhtm parameters. */ ParameterNameValueTypeList GetAlgParams() { return chpData.GetAlgParams(); } /*! Gets the summary parameters * @return The summary parameters. */ ParameterNameValueTypeList GetSummaryParams() { return chpData.GetSummaryParams(); } /*! Gets the number of entries (probe sets) */ int32_t GetEntryCount() { return chpData.GetEntryCount(); } /*! Gets the sequence data. * @param index The row index. * @param entry The quantification/detection value. */ void GetQuantificationDetectionEntry(int index, affymetrix_calvin_data::ProbeSetQuantificationDetectionData &entry) { chpData.GetQuantificationDetectionEntry(index, entry); } private: /*! Reads the CHP file. * @return True if successful. */ bool Read(); /*! Reads the header of the CHP file * @return True if successful */ bool ReadHeader(); public: /*! A class to register the quantification/detection CHP reader. */ class Reg : public FusionCHPDataReg { public: /*! Constructor - register the quantification/detection file type. */ Reg() : FusionCHPDataReg() { std::list ids; ids.push_back(CHP_QUANTIFICATION_DETECTION_TYPE); SetFileTypeIds(ids); } /*! Creates a quantification/detection CHP object. * @return The quantification/detection CHP object. */ FusionCHPData *MakeObject() { return new FusionCHPQuantificationDetectionData; } }; /*! The one and only registration object. This registers the class as a CHP reader. */ static Reg reg; public: /*! Converts the type to the quantification/detection CHP type. * @param chip The pointer to the CHP data object. * @return The quantification/detection CHP data type or NULL if not compatible. */ static FusionCHPQuantificationDetectionData * FromBase(FusionCHPData *chip); }; } #endif affxparser/src/fusion/calvin_files/fusion/src/FusionCHPTilingData.cpp0000644000175200017520000000465114516003651027031 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionCHPTilingData.h" // #include "calvin_files/parsers/src/CHPTilingFileReader.h" // using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_exceptions; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_io; /*! Used to register the CHP reader. */ FusionCHPTilingData::Reg FusionCHPTilingData::reg; /*! The class name. */ static AffymetrixGuidType ObjectName = "FusionCHPTilingData"; /* * Construct the class. */ FusionCHPTilingData::FusionCHPTilingData() { } /* * Clean up any used memory. */ FusionCHPTilingData::~FusionCHPTilingData() { chpData.Clear(); } /* * Just call read. */ bool FusionCHPTilingData::ReadHeader() { return Read(); } /* * Read the file. */ bool FusionCHPTilingData::Read() { CHPTilingFileReader reader; reader.SetFilename(filename); try { reader.Read(chpData); return true; } catch(...) { return false; } } /* * Convert to the CHP pointer. */ FusionCHPTilingData * FusionCHPTilingData::FromBase(FusionCHPData *chip) { if (chip != NULL && chip->GetObjectName() == ObjectName) return (FusionCHPTilingData *)chip; return NULL; } AffymetrixGuidType FusionCHPTilingData::GetObjectName() { return ObjectName; } /* * Get the id of the file (only valid for Command Console "calvin" files) */ AffymetrixGuidType FusionCHPTilingData::FileId() { return chpData.GetGenericData().FileIdentifier(); } /* * Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ GenericData *FusionCHPTilingData::GetGenericData() { return &chpData.GetGenericData(); } affxparser/src/fusion/calvin_files/fusion/src/FusionCHPTilingData.h0000644000175200017520000001035414516003651026473 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #if !defined _AffymetrixFusionTilingCHPData_HEADER_ #define _AffymetrixFusionTilingCHPData_HEADER_ /*! \file FusionCHPTilingData.h This file provides tiling CHP file reading capabilities.*/ #include "calvin_files/data/src/CHPTilingData.h" #include "calvin_files/fusion/src/FusionCHPData.h" // namespace affymetrix_fusion_io { /*! This class provides storage and reading capabilities for tiling CHP files */ class FusionCHPTilingData : public FusionCHPData { private: /*! Constructor */ FusionCHPTilingData(); /*! The CHP object. */ affymetrix_calvin_io::CHPTilingData chpData; public: /*! Destructor */ ~FusionCHPTilingData(); /*! Get the id of the file (only valid for Command Console "calvin" files) * @return The unique file id. */ affymetrix_calvin_utilities::AffymetrixGuidType FileId(); /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ affymetrix_calvin_io::GenericData *GetGenericData(); /*! Gets the class name. */ affymetrix_calvin_utilities::AffymetrixGuidType GetObjectName(); /*! Gets the number of sequence stored in the file. * @return The number of sequence stored in the file. */ int GetNumberSequences() { return chpData.GetNumberSequences(); } /*! Gets the name of the algorithm. * @return The algorithm name. */ std::wstring GetAlgName() { return chpData.GetAlgName(); } /*! Gets the algorithm version. * @return The version. */ std::wstring GetAlgVersion() { return chpData.GetAlgVersion(); } /*! Gets the algorithm parameters * @return The algoirhtm parameters. */ ParameterNameValueTypeList GetAlgParams() { return chpData.GetAlgParams(); } /*! Gets the sequence data. * @return The data associated with the sequence. */ TilingSequenceData GetTilingSequenceData() { return chpData.GetTilingSequenceData(); } /*! Gets the number of entries in a tiling sequence. * @param index The sequence index. * @return The number of entries in the sequence. */ int32_t GetTilingSequenceEntryCount(int index) { return chpData.GetTilingSequenceEntryCount(index); } /*! Opens a group for reading. * @param index The index to the sequence. */ void OpenTilingSequenceDataSet(int index) { chpData.OpenTilingSequenceDataSet(index); } /*! Returns the entry for the given row. The data set must be open. * @param row The row index. * @param e The entry. */ void GetTilingSequenceEntry(int row, affymetrix_calvin_io::CHPTilingEntry& e) { chpData.GetTilingSequenceEntry(row, e); } private: /*! Reads the CHP file. * @return True if successful. */ bool Read(); /*! Reads the header of the CHP file * @return True if successful */ bool ReadHeader(); /*! A class to register the tiling CHP reader. */ class Reg : public FusionCHPDataReg { public: /*! Constructor - register the tiling file type. */ Reg() : FusionCHPDataReg() { std::list ids; ids.push_back(CHP_TILING_TYPE); SetFileTypeIds(ids); } /*! Creates a tiling CHP object. * @return The tiling CHP object. */ FusionCHPData *MakeObject() { return new FusionCHPTilingData; } }; /*! The one and only registration object. This registers the class as a CHP reader. */ static Reg reg; public: /*! Converts the type to the tiling CHP type. * @param chip The pointer to the CHP data object. * @return The tiling CHP data type or NULL if not compatible. */ static FusionCHPTilingData * FromBase(FusionCHPData *chip); }; } #endif affxparser/src/fusion/calvin_files/fusion/src/FusionCoords.h0000644000175200017520000000413614516003651025352 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FusionCoords_HEADER_ #define _FusionCoords_HEADER_ /*! \file FusionCoords.h This file defines the fusion coordinate classes. */ #include #include #include #include // namespace affymetrix_fusion_io { /*! Defines a floating-point location */ struct FPoint { /*! The x coordinate */ float x; /*! The y coordinate */ float y; /*! Equality test */ bool operator==(const FPoint& lhs)const { return (x==lhs.x && y==lhs.y); } /*! Inequality test */ bool operator!=(const FPoint& lhs)const { return !(*this==lhs); } }; typedef std::vector FPointVector; /*! Defines a floating point rectangle */ struct FGridCoords { /* Empty constructor */ FGridCoords() { upperleft.x = 0.0f, upperleft.y = 0.0f; upperright = lowerright = lowerleft = upperleft; } /*! Tests if the rectangle is empty */ bool IsEmpty() const { if (upperleft == upperright && lowerleft == lowerright && upperleft == lowerleft) return true; return false; } /*! The upper left coordinate */ FPoint upperleft; /*! The upper right coordinate */ FPoint upperright; /*! The lower right coordinate */ FPoint lowerright; /*! The lower left coordinate */ FPoint lowerleft; }; } #endif // _FusionCoords_HEADER_ affxparser/src/fusion/calvin_files/fusion/src/FusionGRDData.cpp0000644000175200017520000000166414516003651025665 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionGRDData.h" // using namespace affymetrix_fusion_io; affxparser/src/fusion/calvin_files/fusion/src/FusionGRDData.h0000644000175200017520000000225614516003651025330 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionGRDData_HEADER_ #define _AffymetrixFusionGRDData_HEADER_ /*! \file FusionGRDData.h This file defines the Fusion GRD Data classes */ #include "file/GRDFileData.h" // using namespace affxgrd; namespace affymetrix_fusion_io { /*! Storage for the GRD file. */ class FusionGRDFile : public CGRDFileData { }; } #endif affxparser/src/fusion/calvin_files/fusion/src/FusionGrcFileReader.cpp0000644000175200017520000000355514516003651027116 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionGrcFileReader.h" // #include "calvin_files/parsers/src/GridControlFileReader.h" // #include "file/GRCFileData.h" // using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_io; using namespace affymetrix_grid_control; using namespace affymetrix_calvin_exceptions; using namespace affxgrc; using namespace std; /* * Clear class members if needed. */ FusionGrcFileReader::FusionGrcFileReader() { } /* * Clear class members if needed. */ FusionGrcFileReader::~FusionGrcFileReader() { } /* * Read the GRC file. First try reading it as a Calvin file * then as a GCOS GRC file. */ bool FusionGrcFileReader::Read(const string &fileName, GridControlData& data) { GridControlFileReader calvinGrc; try { calvinGrc.Read(fileName, data); return true; } catch(InvalidFileTypeException e) { CGRCFileData gcosGrc; try { gcosGrc.SetFileName(fileName.c_str()); if (gcosGrc.Read(data) == true) return true; } catch(...) { } } catch (...) { } return false; } affxparser/src/fusion/calvin_files/fusion/src/FusionGrcFileReader.h0000644000175200017520000000326614516003651026562 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionGrcFile_HEADER_ #define _AffymetrixFusionGrcFile_HEADER_ /*! \file FusionGrcFileReader.h This file provides fusion classes for reading GRC files. */ #include "file/GridControlData.h" // namespace affymetrix_fusion_io { /*! This class provides GRC file reading capabilities. * * This class will read either the GCOS or Calvin format GRC files. */ class FusionGrcFileReader { public: /*! Constructor */ FusionGrcFileReader(); /*! Destructor */ ~FusionGrcFileReader(); /*! Reads the contents of a GRC file. * @param fileName The name of the file to read. * @param data A reference to an object that will receive information from the file. * @return True if successful. */ bool Read(const std::string &fileName, affymetrix_grid_control::GridControlData& data); }; }; #endif // _AffymetrixFusionGrcFile_HEADER_ affxparser/src/fusion/calvin_files/fusion/src/FusionMSKData.cpp0000644000175200017520000000166414516003651025703 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionMSKData.h" // using namespace affymetrix_fusion_io; affxparser/src/fusion/calvin_files/fusion/src/FusionMSKData.h0000644000175200017520000000225614516003651025346 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionMSKData_HEADER_ #define _AffymetrixFusionMSKData_HEADER_ /*! \file FusionMSKData.h This file defines the Fusion MSK Data classes */ #include "file/MSKFileData.h" // using namespace affxmsk; namespace affymetrix_fusion_io { /*! Storage for the MSK file. */ class FusionMSKFile : public CMSKFileData { }; } #endif affxparser/src/fusion/calvin_files/fusion/src/FusionPSIData.cpp0000644000175200017520000000166414516003651025704 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionPSIData.h" // using namespace affymetrix_fusion_io; affxparser/src/fusion/calvin_files/fusion/src/FusionPSIData.h0000644000175200017520000000225614516003651025347 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionPSIData_HEADER_ #define _AffymetrixFusionPSIData_HEADER_ /*! \file FusionPSIData.h This file defines the Fusion PSI Data classes */ #include "file/PSIFileData.h" // using namespace affxpsi; namespace affymetrix_fusion_io { /*! Storage for the PSI file. */ class FusionPSIFile : public CPSIFileData { }; } #endif affxparser/src/fusion/calvin_files/fusion/src/FusionProbeSetResults.cpp0000644000175200017520000002605714516003651027567 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionProbeSetResults.h" // #include "file/CHPFileData.h" // using namespace affymetrix_fusion_io; FusionExpressionProbeSetResults::FusionExpressionProbeSetResults() { Clear(); } FusionExpressionProbeSetResults::~FusionExpressionProbeSetResults() {} FusionExpressionProbeSetResults FusionExpressionProbeSetResults::operator=(FusionExpressionProbeSetResults &src) { SetDetectionPValue(src.GetDetectionPValue()); SetSignal(src.GetSignal()); SetNumPairs(src.GetNumPairs()); SetNumUsedPairs(src.GetNumUsedPairs()); SetDetection(src.GetDetection()); SetHasCompResults(src.HasCompResults()); SetChangePValue(src.GetChangePValue()); SetSignalLogRatio(src.GetSignalLogRatio()); SetSignalLogRatioLow(src.GetSignalLogRatioLow()); SetSignalLogRatioHigh(src.GetSignalLogRatioHigh()); SetNumCommonPairs(src.GetNumCommonPairs()); SetChange(src.GetChange()); return *this; } void FusionExpressionProbeSetResults::Clear() { SetDetectionPValue(0.0); SetSignal(0.0); SetNumPairs(0); SetNumUsedPairs(0); SetDetection(0); SetHasCompResults(false); SetChangePValue(0.0); SetSignalLogRatio(0.0); SetSignalLogRatioLow(0.0); SetSignalLogRatioHigh(0.0); SetNumCommonPairs(0); SetChange(0); } float FusionExpressionProbeSetResults::GetDetectionPValue() const { return detectionPValue; } float FusionExpressionProbeSetResults::GetSignal() const { return signal; } u_int16_t FusionExpressionProbeSetResults::GetNumPairs() const { return numPairs; } u_int16_t FusionExpressionProbeSetResults::GetNumUsedPairs() const { return numPairsUsed; } u_int8_t FusionExpressionProbeSetResults::GetDetection() const { return detection; } bool FusionExpressionProbeSetResults::HasCompResults() const { return hasCompResults; } float FusionExpressionProbeSetResults::GetChangePValue() const { return changePValue; } float FusionExpressionProbeSetResults::GetSignalLogRatio() const { return sigLogRatio; } float FusionExpressionProbeSetResults::GetSignalLogRatioLow() const { return sigLogRatioLo; } float FusionExpressionProbeSetResults::GetSignalLogRatioHigh() const { return sigLogRatioHi; } u_int16_t FusionExpressionProbeSetResults::GetNumCommonPairs() const { return numCommonPairs; } u_int8_t FusionExpressionProbeSetResults::GetChange() const { return change; } void FusionExpressionProbeSetResults::SetDetectionPValue(float p) { detectionPValue = p; } void FusionExpressionProbeSetResults::SetSignal(float p) { signal = p; } void FusionExpressionProbeSetResults::SetNumPairs(u_int16_t p) { numPairs = p; } void FusionExpressionProbeSetResults::SetNumUsedPairs(u_int16_t p) { numPairsUsed = p; } void FusionExpressionProbeSetResults::SetDetection(u_int8_t p) { detection = p; } void FusionExpressionProbeSetResults::SetHasCompResults(bool p) { hasCompResults = p; } void FusionExpressionProbeSetResults::SetChangePValue(float p) { changePValue = p; } void FusionExpressionProbeSetResults::SetSignalLogRatio(float p) { sigLogRatio = p; } void FusionExpressionProbeSetResults::SetSignalLogRatioLow(float p) { sigLogRatioLo = p; } void FusionExpressionProbeSetResults::SetSignalLogRatioHigh(float p) { sigLogRatioHi = p; } void FusionExpressionProbeSetResults::SetNumCommonPairs(u_int16_t p) { numCommonPairs = p; } void FusionExpressionProbeSetResults::SetChange(u_int8_t p) { change = p; } std::string FusionExpressionProbeSetResults::GetDetectionString() { switch (GetDetection()) { case ABS_PRESENT_CALL: return "P"; break; case ABS_MARGINAL_CALL: return "M"; break; case ABS_ABSENT_CALL: return "A"; break; case ABS_NO_CALL: return "No Call"; break; default: break; } return ""; } std::string FusionExpressionProbeSetResults::GetChangeString() { switch (GetChange()) { case COMP_INCREASE_CALL: return "I"; break; case COMP_DECREASE_CALL: return "D"; break; case COMP_MOD_INCREASE_CALL: return "MI"; break; case COMP_MOD_DECREASE_CALL: return "MD"; break; case COMP_NO_CHANGE_CALL: return "NC"; break; case COMP_NO_CALL: return "No Call"; break; default: break; } return ""; } //////////////////////////////////////////////////////////////////// FusionGenotypeProbeSetResults::FusionGenotypeProbeSetResults() { Clear(); } FusionGenotypeProbeSetResults::~FusionGenotypeProbeSetResults() {} FusionGenotypeProbeSetResults FusionGenotypeProbeSetResults::operator=(FusionGenotypeProbeSetResults &src) { SetAlleleCall(src.GetAlleleCall()); SetConfidence(src.GetConfidence()); SetRAS1(src.GetRAS1()); SetRAS2(src.GetRAS2()); SetPValueAA(src.GetPValueAA()); SetPValueAB(src.GetPValueAB()); SetPValueBB(src.GetPValueBB()); SetPValueNoCall(src.GetPValueNoCall()); return *this; } void FusionGenotypeProbeSetResults::Clear() { SetAlleleCall(0); SetConfidence(0.0); SetRAS1(0.0); SetRAS2(0.0); SetPValueAA(0.0); SetPValueAB(0.0); SetPValueBB(0.0); SetPValueNoCall(0.0); } u_int8_t FusionGenotypeProbeSetResults::GetAlleleCall() { return alleleCall; } float FusionGenotypeProbeSetResults::GetConfidence() { return confidence; } float FusionGenotypeProbeSetResults::GetRAS1() { return RAS1; } float FusionGenotypeProbeSetResults::GetRAS2() { return RAS2; } float FusionGenotypeProbeSetResults::GetPValueAA() { return pValueAA; } float FusionGenotypeProbeSetResults::GetPValueAB() { return pValueAB; } float FusionGenotypeProbeSetResults::GetPValueBB() { return pValueBB; } float FusionGenotypeProbeSetResults::GetPValueNoCall() { return pValueNoCall; } void FusionGenotypeProbeSetResults::SetAlleleCall(u_int8_t p) { alleleCall = p; } void FusionGenotypeProbeSetResults::SetConfidence(float p) { confidence = p; } void FusionGenotypeProbeSetResults::SetRAS1(float p) { RAS1 = p; } void FusionGenotypeProbeSetResults::SetRAS2(float p) { RAS2 = p; } void FusionGenotypeProbeSetResults::SetPValueAA(float p) { pValueAA = p; } void FusionGenotypeProbeSetResults::SetPValueAB(float p) { pValueAB = p; } void FusionGenotypeProbeSetResults::SetPValueBB(float p) { pValueBB = p; } void FusionGenotypeProbeSetResults::SetPValueNoCall(float p) { pValueNoCall = p; } std::string FusionGenotypeProbeSetResults::GetAlleleCallString() { switch (GetAlleleCall()) { case ALLELE_A_CALL: return "A"; break; case ALLELE_B_CALL: return "B"; break; case ALLELE_AB_CALL: return "AB";break; default: return "No Call"; break; } return ""; } //////////////////////////////////////////////////////////////////// float FusionUniversalProbeSetResults::GetBackground() const { return background; } void FusionUniversalProbeSetResults::SetBackground(float bg) { background = bg; } void FusionUniversalProbeSetResults::Clear() { background = 0.0; } FusionUniversalProbeSetResults FusionUniversalProbeSetResults::operator=(FusionUniversalProbeSetResults &src) { SetBackground(src.GetBackground()); return *this; } FusionUniversalProbeSetResults::FusionUniversalProbeSetResults() { Clear(); } FusionUniversalProbeSetResults::~FusionUniversalProbeSetResults() {} //////////////////////////////////////////////////////////////////// FusionForceCallType::FusionForceCallType() { Clear(); } FusionForceCallType::FusionForceCallType(int32_t p, int8_t c, u_int8_t r) { position = p; call = c; reason = r; } FusionForceCallType::~FusionForceCallType() { Clear(); } void FusionForceCallType::Clear() { position = 0; call = 0; reason = 0; } //////////////////////////////////////////////////////////////////// FusionBaseCallType::FusionBaseCallType() { Clear(); } FusionBaseCallType::FusionBaseCallType(int32_t p, int8_t c) { position = p; call = c; } FusionBaseCallType::~FusionBaseCallType() { Clear(); } void FusionBaseCallType::Clear() { position = 0; call = 0; } //////////////////////////////////////////////////////////////////// FusionResequencingResults::FusionResequencingResults() { Clear(); } FusionResequencingResults::~FusionResequencingResults() { Clear(); } void FusionResequencingResults::Clear() { calledBases.clear(); scores.clear(); forceCalls.clear(); origCalls.clear(); } const Int8Vector& FusionResequencingResults::GetCalledBases() { return calledBases; } int8_t FusionResequencingResults::GetCalledBase(int index) { return calledBases[index]; } int32_t FusionResequencingResults::GetCalledBasesSize() const { return (int32_t)calledBases.size(); } void FusionResequencingResults::ResizeCalledBases(int32_t size) { calledBases.resize(size); } void FusionResequencingResults::SetCalledBase(int32_t index, int8_t call) { calledBases[index] = call; } void FusionResequencingResults::AddCalledBase(int8_t call) { calledBases.push_back(call); } const FloatVector& FusionResequencingResults::GetScores() { return scores; } float FusionResequencingResults::GetScore(int32_t index) { return scores[index]; } int32_t FusionResequencingResults::GetScoresSize() const { return (int32_t)scores.size(); } void FusionResequencingResults::ResizeScores(int32_t size) { scores.resize(size); } void FusionResequencingResults::SetScore(int index, float score) { scores[index] = score; } void FusionResequencingResults::AddScore(float score) { scores.push_back(score); } const FusionForceCallVector& FusionResequencingResults::GetForceCalls() { return forceCalls; } FusionForceCallType FusionResequencingResults::GetForceCall(int32_t index) { return forceCalls[index]; } int32_t FusionResequencingResults::GetForceCallsSize() const { return (int32_t)forceCalls.size(); } void FusionResequencingResults::ResizeForceCalls(int32_t size) { forceCalls.resize(size); } void FusionResequencingResults::SetForceCall(int32_t index, FusionForceCallType call) { forceCalls[index] = call; } void FusionResequencingResults::AddForceCall(FusionForceCallType call) { forceCalls.push_back(call); } const FusionBaseCallVector& FusionResequencingResults::GetOrigCalls() { return origCalls; } FusionBaseCallType FusionResequencingResults::GetOrigCall(int32_t index) { return origCalls[index]; } int32_t FusionResequencingResults::GetOrigCallsSize() const { return (int32_t)origCalls.size(); } void FusionResequencingResults::ResizeOrigCalls(int32_t size) { origCalls.resize(size); } void FusionResequencingResults::SetOrigCall(int32_t index, FusionBaseCallType call) { origCalls[index] = call; } void FusionResequencingResults::AddOrigCall(FusionBaseCallType call) { origCalls.push_back(call); } affxparser/src/fusion/calvin_files/fusion/src/FusionProbeSetResults.h0000644000175200017520000003733414516003651027234 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FusionProbeSetResults_HEADER_ #define _FusionProbeSetResults_HEADER_ ////////////////////////////////////////////////////////////////////// #ifdef _MSC_VER #pragma warning(disable: 4786) // identifier was truncated in the debug information #endif ////////////////////////////////////////////////////////////////////// #include "calvin_files/data/src/CHPExpressionEntry.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffyStlCollectionTypes.h" // #include #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affymetrix_fusion_io { //////////////////////////////////////////////////////////////////// /*! Provides a base class for probe set results */ class FusionProbeSetResults { public: /*! Constructor */ FusionProbeSetResults() {}; /*! Destructor */ virtual ~FusionProbeSetResults() {}; }; //////////////////////////////////////////////////////////////////// /*! Expression analysis probe set results for the MAS5 algorithm */ class FusionExpressionProbeSetResults : public FusionProbeSetResults { private: /*! The detection p-value */ float detectionPValue; /*! The signal value */ float signal; /*! The number of probe pairs in the set */ u_int16_t numPairs; /*! The number of probe pairs used to calculate the signal value */ u_int16_t numPairsUsed; /*! The detection call */ u_int8_t detection; /*! Flag indicating that comparison results exist */ bool hasCompResults; /*! The change p-value */ float changePValue; /*! The signal log ratio */ float sigLogRatio; /*! The signal log ratio low value */ float sigLogRatioLo; /*! The signal log ratio high value */ float sigLogRatioHi; /*! The number of probe pairs in common between the experiment and baseline data */ u_int16_t numCommonPairs; /*! The change call */ u_int8_t change; public: /*! \brief Clears the members. */ void Clear(); /*! \brief Gets detection pvalue * \return Detection pvalue. */ float GetDetectionPValue() const; /*! \brief Gets the signal. * \return Signal. */ float GetSignal() const; /*! \brief gets the number of pairs. * \return Number of pairs. */ u_int16_t GetNumPairs() const; /*! \brief Gets the number used pairs. * \return Number of used pairs. */ u_int16_t GetNumUsedPairs() const; /*! \brief Gets the detection. * \return Detection. */ u_int8_t GetDetection() const; /*! \brief Determines if comp results exsist. * \return True if comp results exsist. */ bool HasCompResults() const; /*! \brief Gets the change pvalue. * \return Cahnge pvalue. */ float GetChangePValue() const; /*! \brief Gets the signal log ratio. * \return Signal log ratio. */ float GetSignalLogRatio() const; /*! \brief Gets the signal log ratio low * \return Signal log ratio low. */ float GetSignalLogRatioLow() const; /*! \brief Gets the signal log ratio high * \return Signal log ratio high. */ float GetSignalLogRatioHigh() const; /*! \brief Gets the number of common pairs * \return Number of common pairs. */ u_int16_t GetNumCommonPairs() const; /*! \brief Gets the change. * \return Change. */ u_int8_t GetChange() const; /*! \brief Sets the detection pvalue. * \param p pvalue to set. */ void SetDetectionPValue(float p); /*! \brief Sets the signal. * \param p Signal to set. */ void SetSignal(float p); /*! \brief Sets the number of pairs. * \param p Number of pairs. */ void SetNumPairs(u_int16_t p); /*! \brief Sets the number of used pairs. * \param p Number of used pairs to set. */ void SetNumUsedPairs(u_int16_t p); /*! \brief Sets detection. * \param p Detection to set. */ void SetDetection(u_int8_t p); /*! \brief Sets whether has comp results. * \param p Flag whether comp results exsists. */ void SetHasCompResults(bool p); /*! \brief Sets the change pvalue. * \param p Change pvalue to set. */ void SetChangePValue(float p); /*! \brief Sets the signal log ratio. * \param p Signal log ratio. */ void SetSignalLogRatio(float p); /*! \brief Sets the signal log ratio low. * \param p Signal ratio low. */ void SetSignalLogRatioLow(float p); /*! \brief Sets the signal log ratio high * \param p Signal log ratio high to set. */ void SetSignalLogRatioHigh(float p); /*! \brief Sets the number of common pairs. * \param p Number of common pairs. */ void SetNumCommonPairs(u_int16_t p); /*! \brief Sets the change. * \param p Change value to set. */ void SetChange(u_int8_t p); /*! \brief Gets a string representation of the detection call. * \return The detection call */ std::string GetDetectionString(); /*! \brief Gets a string representation of the change call. * \return The change call */ std::string GetChangeString(); /*! Assignment operator * \param src The object to copy * \return The copied object */ FusionExpressionProbeSetResults operator=(FusionExpressionProbeSetResults &src); /*! \brief Constructor */ FusionExpressionProbeSetResults(); /*! \brief Destructor */ ~FusionExpressionProbeSetResults(); }; //////////////////////////////////////////////////////////////////// /*! Genotyping analysis probe set results */ class FusionGenotypeProbeSetResults : public FusionProbeSetResults { private: /*! The allele call */ u_int8_t alleleCall; /*! The confidence associated with the allele call */ float confidence; /*! The relative allele strength for the first block */ float RAS1; /*! The relative allele strength for the second block */ float RAS2; /*! The p-value associated with an AA call */ float pValueAA; /*! The p-value associated with an AB call */ float pValueAB; /*! The p-value associated with an BB call */ float pValueBB; /*! The p-value associated with an no call call */ float pValueNoCall; public: void Clear(); /*! \brief The allele call * \return Allele call. */ u_int8_t GetAlleleCall(); /*! \brief The confidence associated with the allele call. * \return Confidence. */ float GetConfidence(); /*! \brief The relative allele strength for the first block. * \return RAS1. */ float GetRAS1(); /*! \brief The relative allele strength for the second block. * \return RAS2. */ float GetRAS2(); /*! \brief The p-value associated with an AA call. * \return pvalue for AA. */ float GetPValueAA(); /*! \brief The p-value associated with an AB call. * \return pvalue for AB. */ float GetPValueAB(); /*! \brief The p-value associated with an BB call. * \return pvalue for BB. */ float GetPValueBB(); /*! \brief The p-value associated with an no call call. * \return pvalue no call. */ float GetPValueNoCall(); /*! \brief Sets the allele call. * \param p Allele call to set. */ void SetAlleleCall(u_int8_t p); /*! \brief The confidence associated with the allele call. * \param p Confidence to value to set. */ void SetConfidence(float p); /*! \brief Sets the relative allele strength for the first block. * \param p RAS1 value to set. */ void SetRAS1(float p); /*! \brief Sets the relative allele strength for the second block. * \param p RAS2 value to set. */ void SetRAS2(float p); /*! \brief Sets the p-value associated with an AA call. * \param p pvalue aa to set. */ void SetPValueAA(float p); /*! \brief Sets the p-value associated with an AB call. * \param p pvalue AB to set. */ void SetPValueAB(float p); /*! \brief Sets the p-value associated with an BB call. * \param p pvalue BB to set. */ void SetPValueBB(float p); /*! \brief Sets the p-value associated with an no call call. * \param p pvalue no call to set. */ void SetPValueNoCall(float p); /*! \brief Gets a string representation of the allele call. * \return Allele call. */ std::string GetAlleleCallString(); /*! \brief Assignment operator * \param src Object to copy * \return Copied object */ FusionGenotypeProbeSetResults operator=(FusionGenotypeProbeSetResults &src); /*! \brief Constructor */ FusionGenotypeProbeSetResults(); /*! \brief Destructor */ ~FusionGenotypeProbeSetResults(); }; //////////////////////////////////////////////////////////////////// /*! Universal (tag array) analysis probe set results. */ class FusionUniversalProbeSetResults : public FusionProbeSetResults { private: /*! The background value.*/ float background; public: /*! Gets the background value. * @return The background value. */ float GetBackground() const; /*! Sets the background value. * @param bg The background value. */ void SetBackground(float bg); void Clear(); /*! Assignment operator * @param src The object to copy * @return The copied object */ FusionUniversalProbeSetResults operator=(FusionUniversalProbeSetResults &src); /*! Constructor */ FusionUniversalProbeSetResults(); /*! Destructor */ ~FusionUniversalProbeSetResults(); }; //////////////////////////////////////////////////////////////////// /*! A structure to hold a base call at a given position (index). */ class FusionBaseCallType { protected: /*! The position (index) of the call. */ int32_t position; /*! The call. */ int8_t call; public: /*! Constructor. */ FusionBaseCallType(); /*! Constructor with values. * @param p The position. * @param c The call. */ FusionBaseCallType(int32_t p, int8_t c); /*! Destructor */ ~FusionBaseCallType(); /*! Clears the values. */ void Clear(); /*! Gets the position. * @return The position. */ int32_t GetPosition() const { return position; } /*! Gets the call. * @return The call */ int8_t GetCall() const { return call; } /*! Sets the position * @param p The position. */ void SetPosition(int32_t p) { position = p; } /*! Sets the call. * @param c The call. */ void SetCall(int8_t c) { call = c; } }; /*! A vector of calls. */ typedef std::vector FusionBaseCallVector; //////////////////////////////////////////////////////////////////// /*! A class to hold a force call, its position and reason. * * A force call is the call the algorithm would have made if the thresholds * were not applied. */ class FusionForceCallType : public FusionBaseCallType { protected: /*! The reason for the call. */ int8_t reason; public: /*! \brief Constructor */ FusionForceCallType(); /*! \brief Constructor * \param p Position * \param c Call * \param r Reason */ FusionForceCallType(int32_t p, int8_t c, u_int8_t r); /*! \brief Destructor */ ~FusionForceCallType(); /*! \brief Clears members. */ void Clear(); /*! \brief Gets the reason. * \return Reason. */ u_int8_t GetReason() const { return reason; } /*! \brief Sets the call. * \param r Reason value to set. */ void SetReason(u_int8_t r) { reason = r; } }; /*! A vector of force calls. */ typedef std::vector FusionForceCallVector; //////////////////////////////////////////////////////////////////// /*! \brief Resequencing results. */ class FusionResequencingResults { private: /*! \brief The called bases. */ Int8Vector calledBases; /*! Base call scores. */ FloatVector scores; /*! An array of force calls - base calls the algorithm would have made if the thresholds were removed. */ FusionForceCallVector forceCalls; /*! An array of original calls. The calledBases contained the results of the algorithm and user edits. * If a user edits a base the original algorithm called base is stored in this vector. */ FusionBaseCallVector origCalls; public: /*! Constructor */ FusionResequencingResults(); /*! Destructor */ ~FusionResequencingResults(); /*! Clears the members. */ void Clear(); /*! \brief Gets the called bases. * \return Array of called bases. */ const Int8Vector &GetCalledBases(); /*! \brief Gets the called base at the given index. * \param index Index to the called bases array. * \return Called base. */ int8_t GetCalledBase(int index); /*! \brief Gets the size of the called bases array. * \return Size of the called bases array. */ int32_t GetCalledBasesSize() const; /*! \brief Resizes the called bases array. * \param size Size of the array. */ void ResizeCalledBases(int32_t size); /*! \brief Sets the called base. * \param index Index to the array. * \param call Call. */ void SetCalledBase(int32_t index, int8_t call); /*! \brief Adds the called base to the end of the list. * \param call Call. */ void AddCalledBase(int8_t call); /*! \brief Gets the scores. * \return Array of scores. */ const FloatVector& GetScores(); /*! \brief Gets the score at the given index. * \param index Index to the scores array. * \return Score. */ float GetScore(int32_t index); /*! \brief Gets the size of the scores array. * \return Size of the scores array. */ int32_t GetScoresSize() const; /*! \brief Resizes the scores array. * \param size Size of the array. */ void ResizeScores(int32_t size); /*! \brief Sets the score. * \param index Index to the array. * \param score Score. */ void SetScore(int index, float score); /*! \brief Adds the score to the end of the list. * \param score Score. */ void AddScore(float score); /*! \brief Gets the force calls. * \return Array of force calls. */ const FusionForceCallVector& GetForceCalls(); /*! \brief Gets the force call at the given index. * \param index Index to the force calls array. * \return Force call. */ FusionForceCallType GetForceCall(int32_t index); /*! \brief Gets the size of the force calls array. * \return Size of the force calls array. */ int32_t GetForceCallsSize() const; /*! \brief Resizes the force calls array. * \param size Size of the array. */ void ResizeForceCalls(int32_t size); /*! \brief Sets the force call. * \param index Index to the array. * \param call Force call. */ void SetForceCall(int32_t index, FusionForceCallType call); /*! \brief Adds the force call to the end of the list. * \param call Force call. */ void AddForceCall(FusionForceCallType call); /*! \brief Gets the original called bases. * \return Array of original calls. */ const FusionBaseCallVector& GetOrigCalls(); /*! \brief Gets the original called base at the given index. * \param index Index to the original calls array. * \return Original call. */ FusionBaseCallType GetOrigCall(int32_t index); /*! \brief Gets the size of the original calls array. * \return Size of the original calls array. */ int32_t GetOrigCallsSize() const; /*! \brief Resizes the original calls array. * \param size Size of the array. */ void ResizeOrigCalls(int32_t size); /*! \brief Sets the original call. * \param index Index to the array. * \param call Original call. */ void SetOrigCall(int32_t index, FusionBaseCallType call); /*! \brief Adds the original call to the end of the list. * \param call Original call. */ void AddOrigCall(FusionBaseCallType call); }; //////////////////////////////////////////////////////////////////// } // namespace #endif // !defined(_FusionProbeSetResults_HEADER_) affxparser/src/fusion/calvin_files/fusion/src/FusionSMDData.cpp0000644000175200017520000000166414516003651025674 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/FusionSMDData.h" // using namespace affymetrix_fusion_io; affxparser/src/fusion/calvin_files/fusion/src/FusionSMDData.h0000644000175200017520000000225514516003651025336 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionSMDData_HEADER_ #define _AffymetrixFusionSMDData_HEADER_ /*! \file FusionSMDData.h This file defines the Fusion SMD Data classes */ #include "file/SMDFileData.h" // using namespace affxsmd; namespace affymetrix_fusion_io { /*! Storage for the SMD file. */ class FusionSMDFile : public SMDFileData { }; } #endif affxparser/src/fusion/calvin_files/fusion/src/FusionTagValuePairType.h0000644000175200017520000000437114516003651027310 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixFusionTagValuePairType_HEADER_ #define _AffymetrixFusionTagValuePairType_HEADER_ /*! \file FusionTagValuePairType.h This file defines a fusion tag-value pair class */ #include "calvin_files/parameter/src/ParameterNameValueType.h" // #include #include #include // namespace affymetrix_fusion_io { /*! This is a tag (name) - value pair class*/ class FusionTagValuePairType { public: /*! Name of the item */ std::wstring Tag; /*! Value of the item in string form */ std::wstring Value; /*! Assignment operator */ FusionTagValuePairType operator=(const FusionTagValuePairType vp) { Tag = vp.Tag; Value = vp.Value; nvt = vp.nvt; return *this; } /*! Equality operator */ bool operator==(const FusionTagValuePairType vp) { if (vp.Tag == Tag) return true; return false; } /*! Equality operator * @param tag Name of the tag * @return True if the tag argument matches the object tag */ bool operator==(const wchar_t *tag) { if (Tag == tag) return true; return false; } /*! Get the Calvin ParameterNameValueType object * @return Calvin ParameterNameValueType */ affymetrix_calvin_parameter::ParameterNameValueType& DetailedType() { return nvt; } private: /*! Embbedded Calvin parameter type object */ affymetrix_calvin_parameter::ParameterNameValueType nvt; }; /*! FusionTagValuePairType list */ typedef std::list FusionTagValuePairTypeList; } #endif affxparser/src/fusion/calvin_files/fusion/src/GCOSAdapter/0000755000175200017520000000000014516022540024612 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/fusion/src/GCOSAdapter/GCOSCELDataAdapter.cpp0000644000175200017520000001673414516003651030505 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/GCOSAdapter/GCOSCELDataAdapter.h" // #include "calvin_files/utils/src/StringUtils.h" // using namespace affxcel; using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_parameter; /* * Constructor */ GCOSCELDataAdapter::GCOSCELDataAdapter() { } /* * Destructor */ GCOSCELDataAdapter::~GCOSCELDataAdapter() { gcosCel.Clear(); } /* * Can this object read the file */ bool GCOSCELDataAdapter::CanReadFile() { bool canRead = gcosCel.ReadHeader(); gcosCel.Close(); return canRead; } /* */ void GCOSCELDataAdapter::SetFileName(const std::string& value) { gcosCel.SetFileName(value.c_str()); } /* */ std::string GCOSCELDataAdapter::GetFileName() const { return gcosCel.GetFileName(); } /* */ void GCOSCELDataAdapter::SetError(const wchar_t* value) { std::string error = StringUtils::ConvertWCSToMBS(value); gcosCel.SetError(error.c_str()); } /* */ std::wstring GCOSCELDataAdapter::GetError() { return StringUtils::ConvertMBSToWCS(gcosCel.GetError()); } /* */ std::wstring GCOSCELDataAdapter::GetHeaderKey(const wchar_t* key) { std::string keyString = StringUtils::ConvertWCSToMBS(key); return StringUtils::ConvertMBSToWCS(gcosCel.GetHeaderKey(keyString.c_str())); } /* */ std::wstring GCOSCELDataAdapter::GetHeader() { return StringUtils::ConvertMBSToWCS(gcosCel.GetHeaderString()); } /* */ std::wstring GCOSCELDataAdapter::GetAlg() { return StringUtils::ConvertMBSToWCS(gcosCel.GetAlg()); } /* */ std::wstring GCOSCELDataAdapter::GetAlgVer() { std::wstring emptyStr; // to be implemented if needed return emptyStr; } /* */ std::wstring GCOSCELDataAdapter::GetParams() { return StringUtils::ConvertMBSToWCS(gcosCel.GetParams()); } /* */ std::wstring GCOSCELDataAdapter::GetAlgorithmParameter(const wchar_t *tag) { std::string tagString = StringUtils::ConvertWCSToMBS(tag); return StringUtils::ConvertMBSToWCS(gcosCel.GetAlgorithmParameter(tagString.c_str())); } /* */ std::wstring GCOSCELDataAdapter::GetAlgorithmParameterTag(int index) { return StringUtils::ConvertMBSToWCS(gcosCel.GetAlgorithmParameterTag(index)); } /* */ std::wstring GCOSCELDataAdapter::GetAlgorithmParameters() { return StringUtils::ConvertMBSToWCS(gcosCel.GetAlgorithmParameters()); } /* */ void GCOSCELDataAdapter::GetParameters(FusionTagValuePairTypeList& values) { FusionTagValuePairType tvp; int count = gcosCel.GetNumberAlgorithmParameters(); for (int i=0; i& intensities) { return gcosCel.GetIntensities(index,intensities); } /* */ float GCOSCELDataAdapter::GetIntensity(int x, int y) { return gcosCel.GetIntensity(x, y); } /* */ float GCOSCELDataAdapter::GetStdv(int index) { return gcosCel.GetStdv(index); } /* */ float GCOSCELDataAdapter::GetStdv(int x, int y) { return gcosCel.GetStdv(x, y); } /* */ short GCOSCELDataAdapter::GetPixels(int index) { return gcosCel.GetPixels(index); } /* */ short GCOSCELDataAdapter::GetPixels(int x, int y) { return gcosCel.GetPixels(x, y); } // Accessors for the mask/outlier flags /* */ bool GCOSCELDataAdapter::IsMasked(int x, int y) { return gcosCel.IsMasked(x, y); } /* */ bool GCOSCELDataAdapter::IsMasked(int index) { return gcosCel.IsMasked(index); } /* */ bool GCOSCELDataAdapter::IsOutlier(int x, int y) { return gcosCel.IsOutlier(x, y); } /* */ bool GCOSCELDataAdapter::IsOutlier(int index) { return gcosCel.IsOutlier(index); } // For reading a file. /* */ void GCOSCELDataAdapter::Close() { gcosCel.Close(); } /* */ bool GCOSCELDataAdapter::ReadHeader() { return gcosCel.ReadHeader(); } /* */ bool GCOSCELDataAdapter::Read(bool bIncludeMaskAndOutlier) { return gcosCel.Read(bIncludeMaskAndOutlier); } /* */ void GCOSCELDataAdapter::Clear() { gcosCel.Clear(); } /* */ //void GCOSCELDataAdapter::SetAlgorithmName(const wchar_t *str) //{ // std::string name = StringUtils::ConvertWCSToMBS(str); // gcosCel.SetAlgorithmName(name.c_str()); //} /* */ //void GCOSCELDataAdapter::AddAlgorithmParameter(const wchar_t *tag, const wchar_t *value) //{ // std::string tagString = StringUtils::ConvertWCSToMBS(tag); // std::string valueString = StringUtils::ConvertWCSToMBS(value); // gcosCel.AddAlgorithmParameter(tagString.c_str(), valueString.c_str()); //} /* */ //void GCOSCELDataAdapter::SetChipType(const wchar_t *str) //{ // std::string type = StringUtils::ConvertWCSToMBS(str); // gcosCel.SetChipType(type.c_str()); //} /* */ void GCOSCELDataAdapter::CopyGCOSEntryToCalvin(const CELFileEntryType& source, FusionCELFileEntryType& target) { target.Intensity = source.Intensity; target.Stdv = source.Stdv; target.Pixels = source.Pixels; } /* * Returns the list of parameters associated with a data set, empty for GCOS files */ ParameterNameValueTypeList GCOSCELDataAdapter::GetDataSetParameters(const std::wstring &setName) { ParameterNameValueTypeList params; return params; } affxparser/src/fusion/calvin_files/fusion/src/GCOSAdapter/GCOSCELDataAdapter.h0000644000175200017520000002630514516003651030145 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixGCOSCELDataAdapter_HEADER_ #define _AffymetrixGCOSCELDataAdapter_HEADER_ /*! \file GCOSCELDataAdapter.h This file defines the GCOS Fusion CEL Data adapter classes */ #include "calvin_files/fusion/src/FusionCELDataAdapterInterface.h" #include "calvin_files/utils/src/StringUtils.h" // #include "file/CELFileData.h" // #include #include // namespace affymetrix_fusion_io { //////////////////////////////////////////////////////////////////// /*! \brief GCOS Adapter that wraps GCOS cell file reader for Fusion and contains the interfaces to the data.*/ class GCOSCELDataAdapter : public IFusionCELDataAdapter { public: /*! \brief Constructor */ GCOSCELDataAdapter(); /*! \brief Destructor */ virtual ~GCOSCELDataAdapter(); public: /*! \brief Can this object read the file * \return If the cell file can be read. */ bool CanReadFile(); /*! Set the cell file name. * \param value The cell file name to be set */ void SetFileName(const std::string& value); /*! \brief Get the cell file name. * \return The currently set cell file name. */ std::string GetFileName() const; /*! \brief Get the id of the file (only valid for Command Console "calvin" files) * \return The unique file id. */ affymetrix_calvin_utilities::AffymetrixGuidType GetFileId() { return ""; } /*! \brief Set the error message. * \param value The error message to be set. */ void SetError(const wchar_t* value); /*! \brief Get the currently set error message string. * \return The error message. */ std::wstring GetError(); /*! \brief Get the header key. * \return The header key. */ std::wstring GetHeaderKey(const wchar_t* key); /*! \brief Get the version of the cell file. * \return The cell file version. */ int GetVersion() { return gcosCel.GetVersion(); } /*! \brief Get the number of columns. * \return The number of columns */ int GetCols() { return gcosCel.GetCols(); } /*! \brief Get the number of rows. * \return The number of rows. */ int GetRows() { return gcosCel.GetRows(); } /*! \brief Get number of cells * \return The number of cells */ int GetNumCells() { return gcosCel.GetNumCells(); } /*! \brief Get the header string. * \return The header as a string. */ std::wstring GetHeader(); /*! \brief Get the algorithm name. * \return The algorithm name. */ std::wstring GetAlg(); /*! \brief Get the algorithm version. * \return The algorithm version. */ std::wstring GetAlgVer(); /*! \brief Get the parameters. * \return The parameters used for creating the cell file. */ std::wstring GetParams(); /*! \brief Get a parameter. * \param tag Parameter name associated with a parameter value. * \return The parameter value. */ std::wstring GetAlgorithmParameter(const wchar_t *tag); /*! \brief Get a parameter. * \param index The index to the parameter. * \return The parameter value. */ std::wstring GetAlgorithmParameterTag(int index); /*! \brief Get number of parameters. * \return The number of parameters. */ int GetNumberAlgorithmParameters() { return gcosCel.GetNumberAlgorithmParameters(); } /*! \brief Get the number of parameters. * \return The parameters. */ std::wstring GetAlgorithmParameters(); /*! \brief Get parameters. * \param values Collection of name/value type parameter list. */ void GetParameters(FusionTagValuePairTypeList& values); /*! \brief Get the DatHeader string. * \return The DatHeader string. */ std::wstring GetDatHeader() { return affymetrix_calvin_utilities::StringUtils::ConvertMBSToWCS(gcosCel.GetDatHeader()); } /*! \brief Get chip type. * \return The chip type of the cell. */ std::wstring GetChipType(); /*! \brief Get the library package * \return blank value */ std::wstring GetLibraryPackageName() { return L""; } /*! \brief Get the master file * \return blank value */ std::wstring GetMasterFileName() { return L""; } /*! \brief Get cell margin. \return The cell margin. */ int GetCellMargin() { return gcosCel.GetCellMargin(); } /*! \brief Get number of outliers. * \return The number of outliers. */ unsigned int GetNumOutliers() { return gcosCel.GetNumOutliers(); } /*! \brief Get number of masked cells. * \return The number of masked cells. */ unsigned int GetNumMasked() { return gcosCel.GetNumMasked(); } /*! \brief Get the grid coordinates. * \return Returns the grid coordinates. */ FGridCoords GetGridCorners(); // Index/position conversions /*! \brief Translate index to X. * \param index The index to translate for x. * \return The translated index to x value. */ int IndexToX(int index); /*! \brief Translate index to Y. * \param index The index to translate for y. * \return The translated index to y value. */ int IndexToY(int index); /*! \brief Translate X and Y to an index. * \param x X coordinate. * \param y Y coordinate. * \return The translated index from x and y. */ int XYToIndex(int x, int y); // Accessors for intensity information. /*! \brief Get entry by index. * \param index Entry index. * \param entry Entry to be filled from index. */ void GetEntry(int index, FusionCELFileEntryType &entry); /*! \brief Get entry by x and y. * \param x X position. * \param y Y position. * \param entry Entry to be filled from x and y. */ void GetEntry(int x, int y, FusionCELFileEntryType &entry); /*! \brief Get intensity by index position. * \param index Location of intensity * \return The intensity value. */ float GetIntensity(int index); /// @brief a vector of intensities /// @param index index of first intensity /// @param intensities vector to fill /// @return non-zero on error. int GetIntensities(int index,std::vector& intensities); /*! \brief Get intensity by x, y position. * \param x X position. * \param y Y position. * \return The intensity value. */ float GetIntensity(int x, int y); /*! \brief Get standard deviation by index position. * \param index Location of stdv. * \return The standard deviation value. */ float GetStdv(int index); /*! \brief Get standard deviation by x, y position. * \param x X position. * \param y Y position. * \return The standard deviation value. */ float GetStdv(int x, int y); /*! \brief Get pixel by index position. * \param index Location of pixel. * \return The pixel value. */ short GetPixels(int index); /*! \brief Get pixel x, y position. * \param x X position. * \param y Y position. * \return The pixel value. */ short GetPixels(int x, int y); // Accessors for the mask/outlier flags /*! \brief Get masked x, y position. * \param x X position. * \param y Y position. * \return Is index position masked. */ bool IsMasked(int x, int y); /*! \brief Check if masked by index position. * \param index Location to check. * \return Is index position masked. */ bool IsMasked(int index); /*! \brief Check if outlier by index position. * \param x X position. * \param y Y position. * \return Is index position an outlier. */ bool IsOutlier(int x, int y); /*! \brief Check if outlier by index position. * \param index Location to check. * \return Is index position an outlier. */ bool IsOutlier(int index); // For reading a file. /*! Close the cell file. */ void Close(); /*! \brief Close cell file. */ bool ReadHeader(); /*! \brief Read the cell file. * \param bIncludeMaskAndOutlier Flag indicates whether to include in the read, the reading of outliers and masked items. * \return If the read completed successfully. */ bool Read(bool bIncludeMaskAndOutlier); /*! \brief read cell file. * * The state flag is used for GCOS files only. * * \param filename Cell file name to read. * \param state [=CEL_ALL] Reading state * \return If the read completed successfully. * \a nState can be one or combination of the following values:\n\n * CEL_ALL Read all information in file (default)\n * CEL_DATA Read header and intensities only\n * CEL_OUTLIER Read header, intensities and outliers\n * CEL_MASK Read header, intensities and masked cells\n\n */ bool ReadEx(const char *filename, int state) { return gcosCel.ReadEx(filename, state); } /*! \brief Get the reading state * \return The reading state. */ int GetReadState() { return gcosCel.GetReadState(); } /*! \brief clears the members. */ void Clear(); /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ affymetrix_calvin_io::GenericData *GetGenericData() { return NULL; } /*! Returns the list of parameters associated with a data set, empty for GCOS files * @param setName The data set name * @return The list of parameters */ affymetrix_calvin_parameter::ParameterNameValueTypeList GetDataSetParameters(const std::wstring &setName); /*! Sets the active data group for a multi-group CEL file. Default is the first group. */ void SetActiveDataGroup(const std::wstring &groupName) { }; /*! Is this a multi-color CEL file? * @return True if it is multi-color */ bool IsMultiColor() { return false; } /*! Returns a list of the channel (ie data group) names * @return list of channel names */ WStringVector GetChannels() { WStringVector empty; return empty; } /*! \brief Sets the name of the algorithm used to create the CEL file. * \param str The algorithm name. */ // virtual void SetAlgorithmName(const wchar_t *str); /*! \brief Add algorithm parameter. * \param tag Parameter name. * \param value Parameter value. */ // virtual void AddAlgorithmParameter(const wchar_t *tag, const wchar_t *value); /*! \brief Set the number of rows and columns. * \param rows Number of rows. * \param cols Number of columns. */ // virtual void SetDimensions(int rows, int cols) { gcosCel.SetDimensions(rows, cols); } /*! \brief Set the chip type. * \param str Chip type. */ // virtual void SetChipType(const wchar_t *str); /*! \brief Set the margin. * \param margin Margin value to set. */ // virtual void SetMargin(int margin) { gcosCel.SetMargin(margin); } protected: /*! \brief Covert an GCOS entry type to an Calvin entry type. * \param source GCOS entry type to convert from. * \param target Calvin entry type to convert to. */ static void CopyGCOSEntryToCalvin(const affxcel::CELFileEntryType& source, FusionCELFileEntryType& target); protected: /*! The underlying data access object */ affxcel::CCELFileData gcosCel; }; } #endif //_AffymetrixGCOSCELDataAdapter_HEADER_ affxparser/src/fusion/calvin_files/fusion/src/GCOSAdapter/GCOSCHPDataAdapter.cpp0000644000175200017520000002422514516003651030506 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/fusion/src/GCOSAdapter/GCOSCHPDataAdapter.h" // #include "calvin_files/utils/src/StringUtils.h" // #include "file/TagValuePair.h" // using namespace affxchp; using namespace affymetrix_fusion_io; using namespace affymetrix_calvin_utilities; GCOSCHPHeaderAdapter::GCOSCHPHeaderAdapter(affxchp::CCHPFileData* chp): gcosChp(chp) { } GCOSCHPHeaderAdapter::~GCOSCHPHeaderAdapter() { } int GCOSCHPHeaderAdapter::GetCols() const { return gcosChp->GetHeader().GetCols(); } /*! */ int GCOSCHPHeaderAdapter::GetRows() const { return gcosChp->GetHeader().GetRows(); } /*! */ int GCOSCHPHeaderAdapter::GetNumProbeSets() const { return gcosChp->GetHeader().GetNumProbeSets(); } /*! */ AssayType GCOSCHPHeaderAdapter::GetAssayType() const { AssayType assayFusion = (AssayType)gcosChp->GetHeader().GetAssayType(); return assayFusion; } /*! */ std::wstring GCOSCHPHeaderAdapter::GetChipType() const { return StringUtils::ConvertMBSToWCS(gcosChp->GetHeader().GetChipType()); } /*! */ std::wstring GCOSCHPHeaderAdapter::GetAlgName() const { return StringUtils::ConvertMBSToWCS(gcosChp->GetHeader().GetAlgName()); } /*! */ std::wstring GCOSCHPHeaderAdapter::GetAlgVersion() const { return StringUtils::ConvertMBSToWCS(gcosChp->GetHeader().GetAlgVersion()); } /*! Gets the algorithm parameters * @return The number of feature columns */ void GCOSCHPHeaderAdapter::GetAlgorithmParameters(FusionTagValuePairTypeList& values) { ConvertGCOS(gcosChp->GetHeader().AlgorithmParameters(),values); } /*! Gets the algorithm parameter count * @return The number of algorithm parameters */ u_int32_t GCOSCHPHeaderAdapter::GetAlgorithmParameterCount() { TagValuePairTypeList &values = gcosChp->GetHeader().AlgorithmParameters(); return (u_int32_t)values.size(); } /*! Gets the summary parameter count * @return The number of summary parameters */ u_int32_t GCOSCHPHeaderAdapter::GetSummaryParameterCount() { TagValuePairTypeList &values = gcosChp->GetHeader().SummaryParameters(); return (u_int32_t)values.size(); } /*! Gets the summary parameters * @return The summary parameters */ void GCOSCHPHeaderAdapter::GetSummaryParameters(FusionTagValuePairTypeList& values) { ConvertGCOS(gcosChp->GetHeader().SummaryParameters(),values); } /*! Gets the parent CEL file * @return The parent CEL file */ std::wstring GCOSCHPHeaderAdapter::GetParentCellFile() const { return StringUtils::ConvertMBSToWCS(gcosChp->GetHeader().GetParentCellFile()); } /*! Gets the prog ID * @return The prog ID */ std::wstring GCOSCHPHeaderAdapter::GetProgID() const { return StringUtils::ConvertMBSToWCS(gcosChp->GetHeader().GetProgID()); } /*! Gets a specific algorithm parameter given a name/tag * @return The specific algorithm parameter given a name/tag */ std::wstring GCOSCHPHeaderAdapter::GetAlgorithmParameter(const wchar_t *tag) { std::string hdr = StringUtils::ConvertWCSToMBS(tag); return StringUtils::ConvertMBSToWCS(gcosChp->GetHeader().GetAlgorithmParameter(hdr.c_str())); } /*! Gets a specific summary parameter given a name/tag * @return The specific summary parameter given a name/tag */ std::wstring GCOSCHPHeaderAdapter::GetSummaryParameter(const wchar_t *tag) { std::string hdr = StringUtils::ConvertWCSToMBS(tag); return StringUtils::ConvertMBSToWCS(gcosChp->GetHeader().GetSummaryParameter(hdr.c_str())); } /*! Gets the background zone information * @return The background zone information */ void GCOSCHPHeaderAdapter::GetBackgroundZoneInfo(BackgroundZoneInfo& info) { info = gcosChp->GetHeader().GetBackgroundZoneInfo(); } /*! Gets the list of background zone positions and values * @return The list of background zone positions and values */ void GCOSCHPHeaderAdapter::GetBackgroundZones(BackgroundZoneTypeList& zones) { zones = gcosChp->GetHeader().GetBackgroundZones(); } /*! Gets the background value for a given center coordinate * @return The background value for a given center coordinate */ void GCOSCHPHeaderAdapter::GetBackgroundZone(BackgroundZoneType& type, int x, int y) { type = gcosChp->GetHeader().GetBackgroundZone(x,y); } /*! Gets the magic number * @return The magic number */ int GCOSCHPHeaderAdapter::GetMagic() const { return gcosChp->GetHeader().GetMagicNumber(); } /*! Gets the version number * @return The version number */ int GCOSCHPHeaderAdapter::GetVersion() const { return gcosChp->GetHeader().GetVersionNumber(); } ////////////////////////////// // data section /* * Constructor */ GCOSCHPDataAdapter::GCOSCHPDataAdapter() { header = new GCOSCHPHeaderAdapter(&gcosChp); } /* * Destructor */ GCOSCHPDataAdapter::~GCOSCHPDataAdapter() { gcosChp.Clear(); delete header; } /*! Accessors to header. * @return The header data object */ IFusionCHPHeaderAdapter& GCOSCHPDataAdapter::GetHeader() { return *header; } /*! Returns the expression probe set result * @param index The index to the result object of interest. * @param result The expression result. * @return True if the expression result was found. */ bool GCOSCHPDataAdapter::GetExpressionResults(int index, FusionExpressionProbeSetResults& result) { CExpressionProbeSetResults* ps = 0; ps = gcosChp.GetExpressionResults(index); if (ps) { result.SetDetectionPValue(ps->DetectionPValue); result.SetSignal(ps->Signal); result.SetNumPairs(ps->NumPairs); result.SetNumUsedPairs(ps->NumUsedPairs); result.SetDetection(ps->Detection); result.SetHasCompResults(ps->m_HasCompResults); result.SetChangePValue(ps->ChangePValue); result.SetSignalLogRatio(ps->SignalLogRatio); result.SetSignalLogRatioLow(ps->SignalLogRatioLow); result.SetSignalLogRatioHigh(ps->SignalLogRatioHigh); result.SetNumCommonPairs(ps->NumCommonPairs); result.SetChange(ps->Change); return true; } return false; } /*! Returns the genotyping probe set result * @param index The index to the result object of interest. * @param result The genotyping result. * @return True if the genotyping result was found. */ bool GCOSCHPDataAdapter::GetGenotypingResults(int index, FusionGenotypeProbeSetResults& result) { CGenotypeProbeSetResults* ps = 0; ps = gcosChp.GetGenotypingResults(index); if (ps) { result.SetAlleleCall(ps->AlleleCall); result.SetConfidence(ps->Confidence); result.SetRAS1(ps->RAS1); result.SetRAS2(ps->RAS2); result.SetPValueAA(ps->pvalue_AA); result.SetPValueAB(ps->pvalue_AB); result.SetPValueBB(ps->pvalue_BB); result.SetPValueNoCall(ps->pvalue_NoCall); return true; } return false; } /*! Returns the universal (tag array) probe set result * @param index The index to the result object of interest. * @param The universal result. * @return True if the universal result was found. */ bool GCOSCHPDataAdapter::GetUniversalResults(int index, FusionUniversalProbeSetResults& result) { CUniversalProbeSetResults* ps = 0; ps = gcosChp.GetUniversalResults(index); if (ps) { result.SetBackground(ps->GetBackground()); return true; } return false; } bool GCOSCHPDataAdapter::GetResequencingResults(FusionResequencingResults& result) { CResequencingResults* ps = 0; ps = gcosChp.GetResequencingResults(); if (ps) { int32_t sz = ps->GetCalledBasesSize(); for(int i = 0; i < sz; i++) { result.AddCalledBase(ps->GetCalledBase(i)); } sz = ps->GetScoresSize(); for(int i = 0; i < sz; i++) { result.AddScore(ps->GetScore(i)); } sz = ps->GetForceCallsSize(); for(int i = 0; i < sz; i++) { ForceCallType f = ps->GetForceCall(i); FusionForceCallType fusionType(f.position, f.call, f.reason); result.AddForceCall(fusionType); } sz = ps->GetOrigCallsSize(); for(int i = 0; i < sz; i++) { BaseCallType b = ps->GetOrigCall(i); FusionBaseCallType fusionType(b.position, b.call); result.AddOrigCall(fusionType); } return true; } return false; } // Functions to read file. bool GCOSCHPDataAdapter::Read() { return gcosChp.Read(); } /*! Reads the header of the CHP file * @return True if successful */ bool GCOSCHPDataAdapter::ReadHeader() { return gcosChp.ReadHeader(); } /*! Sets the file name. * @param name The full path to the CHP file */ void GCOSCHPDataAdapter::SetFileName(const std::string& value) { gcosChp.SetFileName(value.c_str()); } /*! Gets the file name. * @return The full path to the CHP file. */ std::string GCOSCHPDataAdapter::GetFileName() const { return gcosChp.GetFileName(); } /*! Deallocates any memory used by the class object */ void GCOSCHPDataAdapter::Clear() { gcosChp.Clear(); } /* * Can this object read the file */ bool GCOSCHPDataAdapter::CanReadFile() { bool canRead = gcosChp.ReadHeader(); gcosChp.Clear(); return canRead; } void affymetrix_fusion_io::ConvertFusion(FusionTagValuePairTypeList& fromList, TagValuePairTypeList& toList) { FusionTagValuePairTypeList::iterator begin = fromList.begin(); FusionTagValuePairTypeList::iterator end = fromList.end(); for(; begin != end; begin++) { TagValuePairType type; type.Tag = StringUtils::ConvertWCSToMBS(begin->Tag); type.Value = StringUtils::ConvertWCSToMBS(begin->Value); toList.push_back(type); } } void affymetrix_fusion_io::ConvertGCOS(TagValuePairTypeList& fromList, FusionTagValuePairTypeList& toList) { TagValuePairTypeList::iterator begin = fromList.begin(); TagValuePairTypeList::iterator end = fromList.end(); for(; begin != end; begin++) { FusionTagValuePairType type; type.Tag = StringUtils::ConvertMBSToWCS(begin->Tag); type.Value = StringUtils::ConvertMBSToWCS(begin->Value); toList.push_back(type); } } affxparser/src/fusion/calvin_files/fusion/src/GCOSAdapter/GCOSCHPDataAdapter.h0000644000175200017520000001710714516003651030154 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixGCOSCHPDataAdapter_HEADER_ #define _AffymetrixGCOSCHPDataAdapter_HEADER_ /*! \file GCOSCHPDataAdapter.h This file defines the GCOS Fusion CHP Data adapter classes */ #include "calvin_files/fusion/src/FusionCHPDataAdapterInterface.h" // #include "file/CHPFileData.h" // path required to distinguish between Calvin and GCOS files. // #include #include // using namespace affxchp; namespace affymetrix_fusion_io { /*! \brief Header class for the GCOS CHP adapter */ class GCOSCHPHeaderAdapter : public IFusionCHPHeaderAdapter { public: /*! \brief Constructor */ GCOSCHPHeaderAdapter(affxchp::CCHPFileData* chp); /*! \brief Destructor */ ~GCOSCHPHeaderAdapter(); public: /*! \brief Get numbers columns. * \return Number of columns. */ virtual int GetCols() const; /*! \brief Get number of rows. * \return Number of rows. */ virtual int GetRows() const; /*! \brief Get the number of probesets. * \return Number of probesets. */ virtual int GetNumProbeSets() const; /*! \brief Get assay type. * \return Assay Type. * * \a Assay Types * FusionExpression - Expression assay * FusionGenotyping - Genotyping assay * FusionResequencing - Resequencing assay * FusionUniversal - Tag (universal) assay * FusionUnknown - Unknown assay type */ virtual AssayType GetAssayType() const; /*! \brief Get chip type. * \return The chip type. */ virtual std::wstring GetChipType() const; /*! \brief Get algorithm name. * \return Algorithm name. */ virtual std::wstring GetAlgName() const; /*! \brief Get algorithm version. * \return Algorithm version number. */ virtual std::wstring GetAlgVersion() const; /*! \brief Gets the algorithm parameters. * \param values Name/Value type list to be filled. */ virtual void GetAlgorithmParameters(FusionTagValuePairTypeList& values); /*! \brief Gets the algorithm parameter count. * \return Number of algorithm parameters. */ virtual u_int32_t GetAlgorithmParameterCount(); /*! \brief Gets the summary parameter count. * \return Number of summary parameters. */ virtual u_int32_t GetSummaryParameterCount(); /*! \brief Gets summary parameters. * \param values Name/Value type list to be filled. */ virtual void GetSummaryParameters(FusionTagValuePairTypeList& values); /*! \brief Gets the parent CEL file. * \return Parent CEL file name. */ virtual std::wstring GetParentCellFile() const; /*! \brief Gets the prog ID. * \return Prog ID */ virtual std::wstring GetProgID() const; /*! \brief Gets a specific algorithm parameter given a name/tag. * \param tag Parameter name. * \return Specific algorithm parameter given a name/tag */ virtual std::wstring GetAlgorithmParameter(const wchar_t *tag); /*! \brief Gets a specific summary parameter given a name/tag. * \param tag Parameter name. * \return Specific summary parameter given a name/tag. */ virtual std::wstring GetSummaryParameter(const wchar_t *tag); /*! \brief Gets the background zone information. * \param info Background zone information to be filled. */ virtual void GetBackgroundZoneInfo(BackgroundZoneInfo& info); /*! \brief Gets the list of background zone positions and values. * \param zones List of background zone positions and values to be filled. */ virtual void GetBackgroundZones(BackgroundZoneTypeList& zones); /*! \brief Gets the background value for a given center coordinate. * \param type Background value for a given center coordinate to be filled. * \param x X position of zone. * \param y Y position of zone. */ virtual void GetBackgroundZone(BackgroundZoneType& type, int x, int y); /*! \brief Gets the magic number. * \return Magic number. */ virtual int GetMagic() const; /*! \brief Gets the version number. * \return Version number */ virtual int GetVersion() const; private: /*! A pointer to the root data access object */ affxchp::CCHPFileData* gcosChp; }; /*! \brief Data class for the GCOS CHP adapter */ class GCOSCHPDataAdapter : public IFusionCHPDataAdapter { public: /*! \brief Constructor */ GCOSCHPDataAdapter(); /*! \brief Destructor */ virtual ~GCOSCHPDataAdapter(); /*! \brief Accessors to header. * \return Header object */ virtual IFusionCHPHeaderAdapter& GetHeader(); /*! \brief Can this object read the file. * \return If the cell file can be read. */ virtual bool CanReadFile(); /*! Get the probe set name (only valid for Command Console "calvin" files) * @param index The index to the result object of interest. * @return The probe set name. */ virtual std::string GetProbeSetName(int index) { return ""; } /*! \brief Returns the expression probe set result * \param index Index to the result object of interest. * \param result Expression result. * \return True if the expression result was found. */ virtual bool GetExpressionResults(int index, FusionExpressionProbeSetResults& result); /*! \brief Returns the genotyping probe set result * \param index Index to the result object of interest. * \param result Genotyping result. * \return True if the genotyping result was found. */ virtual bool GetGenotypingResults(int index, FusionGenotypeProbeSetResults& result); /*! \brief Returns the universal (tag array) probe set result * \param index Index to the result object of interest. * \param result Universal result. * \return True if the universal result was found. */ virtual bool GetUniversalResults(int index, FusionUniversalProbeSetResults& result); /*! \brief Gets resequencing results. * \param results Hold the resequencing results. * \return True if resequencing results were retrieved. */ virtual bool GetResequencingResults(FusionResequencingResults& results); /*! \brief Functions to read file. * \return True if the cell file was read. */ virtual bool Read(); /*! \brief Reads the header of the CHP file. * \return True if successful */ virtual bool ReadHeader(); /*! \brief Sets the file name. * \param value Full path to the CHP file */ virtual void SetFileName(const std::string& value); /*! \brief Gets the file name. * \return Full path to the CHP file. */ virtual std::string GetFileName() const; /*! \brief Deallocates any memory used by the class object. */ virtual void Clear(); /*! Get the id of the file (only valid for Command Console "calvin" files) * @return The unique file id. */ affymetrix_calvin_utilities::AffymetrixGuidType FileId() { return ""; } /*! Returns the GenericData object associated with a Calvin file, NULL for GCOS files. */ affymetrix_calvin_io::GenericData *GetGenericData() { return NULL; } protected: /*! The underlying data access object */ affxchp::CCHPFileData gcosChp; /*! Header adapter */ GCOSCHPHeaderAdapter* header; }; } #endif // _AffymetrixGCOSCHPDataAdapter_HEADER_ affxparser/src/fusion/calvin_files/fusion/src/fusionexception.h0000644000175200017520000000263514516003651026161 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FusionException_HEADER_ #define _FusionException_HEADER_ #include "calvin_files/exception/src/ExceptionBase.h" // /*! \file fusionexception.h This file defines exceptions from the fusion layer. */ namespace affymetrix_fusion_exceptions { /*! System did not recognize the requested fusion writer */ class UnknownFusionWriterException : public affymetrix_calvin_exceptions::CalvinException { }; /*! System did not recognize the requested fusion DAT data class */ class UnknownFusionDatDataException : public affymetrix_calvin_exceptions::CalvinException { }; } #endif affxparser/src/fusion/calvin_files/parameter/0000755000175200017520000000000014516003651022446 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/parameter/src/0000755000175200017520000000000014516022540023233 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/parameter/src/AffymetrixParameterConsts.h0000644000175200017520000003323514516003651030565 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixParameterConsts_HEADER_ #define _AffymetrixParameterConsts_HEADER_ /*! \file AffymetrixParameterConsts.h This file defines constant names for static attributes. */ namespace affymetrix_calvin_parameter { /*! Defines the static attribute name for the probe array type of the physical array. */ #define ARRAY_TYPE_PARAM_NAME L"affymetrix-array-type" /*! Defines the static attribute name for the master file. */ #define MASTER_FILE_PARAM_NAME L"affymetrix-master-file" /*! Defines the static attribute name for the library package. */ #define LIBRARY_PACKAGE_PARAM_NAME L"affymetrix-library-package" /*! Defines the number of characters to reserve in the parameter list for the array type name */ #define ARRAY_TYPE_MAX_LEN 100 /*! Defines the number of characters to reserce in the parameter list for the array barcode. */ #define ARRAY_BARCODE_MAX_LEN 50 /*! Defines the static attribute name for the barcode of the physical array. */ #define ARRAY_BARCODE_PARAM_NAME L"affymetrix-array-barcode" /*! Defines the static attribute name for the array lot number of the physical array. */ #define ARRAY_LOT_PARAM_NAME L"affymetrix-array-lot" /*! Defines the static attribute name for the expiration date of the physical array. */ #define ARRAY_EXPIRE_DATE_PARAM_NAME L"affymetrix-array-expiration-date" /*! Defines the static attribute name for the barcode of the array plate. */ #define PLATE_BARCODE_PARAM_NAME L"affymetrix-plate-barcode" /*! Defines the static attribute name for the plate type of the array plate. */ #define PLATE_TYPE_PARAM_NAME L"affymetrix-plate-type" /*! Defines the static attribute name for the row location of the array plate well. */ #define PLATE_WELL_ROW_PARAM_NAME L"affymetrix-plate-well-row" /*! Defines the static attribute name for the column location of the array plate well. */ #define PLATE_WELL_COL_PARAM_NAME L"affymetrix-plate-well-col" /*! Defines the static attribute name for the exposure time for the plate. */ #define PLATE_EXPOSURE_TIME L"affymetrix-plate-exposuretime" /*! Defines the static attribute name for the barcode of the plate. */ #define PLATE_BARCODE L"affymetrix-plate-barcode" /*! Defines the static attribute name for the well position of the plate. */ #define PLATE_PEG_WELL_POSITION L"affymetrix-plate-peg-wellposition" /*! Defines the static attribute name for the index of the subimage of the array plate. */ #define PLATE_PEG_SUBIMAGE_INDEX L"affymetrix-plate-peg-subimage-index" /*! Defines the static attribute name for the GOS datFileName. */ #define PLATE_GCOS_DATFILENAME L"affymetrix-plate-gcos-datfilename" /*! Defines the static attribute name for the plate GUID. */ #define PLATE_GUID L"affymetrix-plate-GUID" /*! Defines the static attribute name for the plate peg GUID. */ #define PLATE_PLATE_PEG_GUID L"affymetrix-plate-peg-GUID" #define AFFY_SCAN_PARAMETER_PREFIX L"affymetrix-scanparameter-" #define AFFY_SCAN_PARAMETER_PREFIX_S std::wstring(AFFY_SCAN_PARAMETER_PREFIX) /*! Defines the static attribute name for the scanner array name. */ #define SCANNER_ARRAY_NAME L"arrayname" /*! Defines the static attribute name for the array part name. */ #define SCANNER_ARRAY_PART_NUMBER L"array-part-number" /*! Defines the static attribute name for the media type. */ #define SCANNER_MEDIA_TYPE L"media-type" /*! Defines the static attribute name for the scanner feature size. */ #define SCANNER_FEATURE_SIZE L"feature-size" /*! Defines the static attribute name for the scanner array width. */ #define SCANNER_ARRAY_WIDTH L"array-width" /*! Defines the static attribute name for the scanner array height. */ #define SCANNER_ARRAY_HEIGHT L"array-height" /*! Defines the static attribute name for the scanner array focus-width. */ #define SCANNER_ARRAY_FOCUS_WIDTH L"array-focus-width" /*! Defines the static attribute name for the scanner array focus-height. */ #define SCANNER_ARRAY_FOCUS_HEIGHT L"array-focus-height" /*! Defines the static attribute name for the scanner array num-wave-lengths. */ #define SCANNER_ARRAY_NUM_WAVE_LENGHTS L"array-num-wave-lengths" /*! Defines the static attribute name for the scanner array num-wave-lengths1. */ #define SCANNER_ARRAY_WAVE_LENGHTS1 L"array-wave-lengths1" /*! Defines the static attribute name for the scanner array num-wave-lengths2. */ #define SCANNER_ARRAY_WAVE_LENGHTS2 L"array-wave-lengths2" /*! Defines the static attribute name for the scanner array num-wave-lengths3. */ #define SCANNER_ARRAY_WAVE_LENGHTS3 L"array-wave-lengths3" /*! Defines the static attribute name for the scanner array num-wave-lengths4. */ #define SCANNER_ARRAY_WAVE_LENGHTS4 L"array-wave-lengths4" /*! Defines the static attribute name for the scanner array num-wave-lengths5. */ #define SCANNER_ARRAY_WAVE_LENGHTS5 L"array-wave-lengths5" /*! Defines the static attribute name for the scanner array numx-sub-arrays. */ #define SCANNER_ARRAY_NUMX_SUB_ARRAYS L"array-numx-sub-arrays" /*! Defines the static attribute name for the scanner array numy-sub-arrays. */ #define SCANNER_ARRAY_NUMY_SUB_ARRAYS L"array-numy-sub-arrays" /*! Defines the static attribute name for the scanner sub array xoffset-um. */ #define SCANNER_SUBARRAY_XOFFSET_UM L"sub-array-xoffset-um" /*! Defines the static attribute name for the scanner sub array yoffset-um. */ #define SCANNER_SUBARRAY_YOFFSET_UM L"sub-array-yoffset-um" /*! Defines the static attribute name for the scanner sub array xspacing-um. */ #define SCANNER_SUBARRAY_XSPACING_UM L"sub-array-xspacing-um" /*! Defines the static attribute name for the scanner sub array yspacing-um. */ #define SCANNER_SUBARRAY_YSPACING_UM L"sub-array-yspacing-um" /*! Defines the static attribute name for the scanner sub array af target ul-xum. */ #define SCANNER_ARRAY_AF_TARGET_UL_XUM L"array-af-target-ul-xum" /*! Defines the static attribute name for the scanner sub array af target ul-yum. */ #define SCANNER_ARRAY_AF_TARGET_UL_YUM L"array-af-target-ul-yum" /*! Defines the static attribute name for the scanner sub array af target lr-xum. */ #define SCANNER_ARRAY_AF_TARGET_LR_XUM L"array-af-target-lr-xum" /*! Defines the static attribute name for the scanner sub array af target lr-yum. */ #define SCANNER_ARRAY_AF_TARGET_LR_YUM L"array-af-target-lr-yum" /*! Defines the static attribute name for the scanner array num exposure times. */ #define SCANNER_ARRAY_NUM_EXPOSURE_TIMES L"array-num-exposure-times" /*! Defines the static attribute name for the scanner sub array exposure times ms1. */ #define SCANNER_ARRAY_EXPOSURE_TIMES_MS1 L"array-exposure-times-ms1" /*! Defines the static attribute name for the scanner sub array exposure times ms2. */ #define SCANNER_ARRAY_EXPOSURE_TIMES_MS2 L"array-exposure-times-ms2" /*! Defines the static attribute name for the scanner sub array exposure times ms3. */ #define SCANNER_ARRAY_EXPOSURE_TIMES_MS3 L"array-exposure-times-ms3" /*! Defines the static attribute name for the scanner sub array exposure times ms4. */ #define SCANNER_ARRAY_EXPOSURE_TIMES_MS4 L"affymetrix-scanner-array-exposure-times-ms4" /*! Defines the static attribute name for the algorithm name. */ #define ALGORITHM_NAME_PARAM_NAME L"affymetrix-algorithm-name" /*! A prefix for chip summary parameter ids. */ #define CHIP_SUMMARY_PARAMETER_NAME_PREFIX L"affymetrix-chipsummary-" /*! A prefix for chip summary parameter ids. */ #define CHIP_SUMMARY_PARAMETER_NAME_PREFIX_S std::wstring(CHIP_SUMMARY_PARAMETER_NAME_PREFIX) /*! Defines the static attribute prefix for algorithm parameter names */ #define ALGORITHM_PARAM_NAME_PREFIX L"affymetrix-algorithm-param-" /*! Defines the static attribute prefix (as a wstring) for algorithm parameter names */ #define ALGORITHM_PARAM_NAME_PREFIX_S std::wstring(ALGORITHM_PARAM_NAME_PREFIX) /*! Defines the static attribute prefix for the application meta data. */ #define APPLICATION_META_INFO_PREFIX L"affymetrix-application-meta-data-info-" /*! Defines the static attribute prefix (as a wstring) for the application meta data. */ #define APPLICATION_META_INFO_PREFIX_S std::wstring(APPLICATION_META_INFO_PREFIX) /*! Defines the static attribute name for the DATHeader */ #define DAT_HEADER_PARAM_NAME L"affymetrix-dat-header" /*! Defines the static attribute name for the partial DATHeader */ #define PARTIAL_DAT_HEADER_PARAM_NAME L"affymetrix-partial-dat-header" /*! Defines the static attribute name for the max pixel intensity */ #define MAX_PIXEL_INTENSITY_PARAM_NAME L"affymetrix-max-pixel-intensity" /*! Defines the static attribute name for the min pixel intensity */ #define MIN_PIXEL_INTENSITY_PARAM_NAME L"affymetrix-min-pixel-intensity" /*! Defines the static attribute name for the orientation */ #define ORIENTATION_PARAM_NAME L"affymetrix-image-orientation" /*! Defines the static attribute name for the file version. This is not the file format version. */ #define FILE_VERSION_PARAM_NAME L"affymetrix-file-version" /*! Defines the static attribute name for the flip-flag which indicates if an image is flipped about the y-axis. */ #define FLIP_FLAG_PARAM_NAME L"affymetrix-image-flip-flag" /*! Defines the static attribute name for the filter wavelength. */ #define FILTER_PARAM_NAME L"affymetrix-filter-wavelength" /*! Defines the static attribute name for the array id */ #define ARRAY_ID_PARAM_NAME L"affymetrix-array-id" /*! Defines the static attribute name for the pixel size */ #define PIXEL_SIZE_PARAM_NAME L"affymetrix-pixel-size" /*! Defines the static attribute name for the scanner type */ #define SCANNER_TYPE_PARAM_NAME L"affymetrix-scanner-type" /*! Defines the static attribute name for the scanner id */ #define SCANNER_ID_PARAM_NAME L"affymetrix-scanner-id" /*! Defines the static attribute name for the scan date */ #define SCAN_DATE_PARAM_NAME L"affymetrix-scan-date" /*! Defines the static attribute name for the number of pixel rows */ #define ROWS_PARAM_NAME L"affymetrix-pixel-rows" /*! Defines the static attribute name for the number of pixel cols */ #define COLS_PARAM_NAME L"affymetrix-pixel-cols" /*! Defines the static attribute name for the number of feature rows */ #define FEATURE_ROWS_NAME L"affymetrix-feature-rows" /*! Defines the static attribute name for the number of feature cols */ #define FEATURE_COLS_NAME L"affymetrix-feature-cols" /*! Defines the static attribute for the algorithm version. */ #define ALG_VERSION_PARAM_NAME std::wstring(L"affymetrix-algorithm-version") /*! Defines the static attribute name for the number of cel rows */ #define CEL_ROWS_PARAM_NAME L"affymetrix-cel-rows" /*! Defines the static attribute name for the number of cel columns */ #define CEL_COLS_PARAM_NAME L"affymetrix-cel-cols" /*! Defines the static attribute name for the program company. This is the company that created the program. */ #define PROGRAM_COMPANY L"program-company" /*! Defines the static attribute name for the program name. This is the program that created the file. */ #define PROGRAM_NAME L"program-name" /*! Defines the static attribute name for the program id. This is a version string or other identifier of the particular program. */ #define PROGRAM_ID L"program-id" /*! CDF Data Type Expression */ #define AFFY_EXPR_PS "affymetrix-expression-probesets" /*! CDF Data Type Genotyping */ #define AFFY_GENO_PS "affymetrix-genotyping-probesets" /*! CDF Data Type Tag */ #define AFFY_TAG_PS "affymetrix-tag-probesets" /*! CDF Data Type Resequencing */ #define AFFY_RESEQ_PS "affymetrix-resequencing-probesets" /*! CDF Data Type Control */ #define AFFY_CNTRL_PS "affymetrix-control-probesets" /*! Defines US English locale. */ #define US_ENGLISH_LOCALE L"en-US" /*! Defines the reserve length of an affymetrix GUID */ #define AFFY_GUID_LEN 55 /*! Defines the parent file identifier GUID */ #define AFFY_PARENT_FILE_IDENTIFIER L"affymetrix-parent-dat-file-identifier" /*! Defines the exposure time string */ #define AFFY_EXPOSURE_TIME L"affymetrix-exposure-time" /*! Defines attribute name for array parameter */ #define AFFY_ARRAY_PARAMETER_PREFIX L"affymetrix-ArrayParameter-" #define AFFY_ARRAY_PARAMETER_PREFIX_S std::wstring(AFFY_ARRAY_PARAMETER_PREFIX) /*! Defines attribute name for array parameter, feature columns */ #define FEATURE_COLUMNS L"featureColumns" /*! Defines attribute name for array parameter, feature rows */ #define FEATURE_ROWS L"featureRows" /*! Defines attribute name for array parameter, feature setback */ #define FEATURE_SET_BACK L"featureSetback" /*! Defines attribute name for array parameter, feature height */ #define FEATURE_HEIGHT L"featureHeight" /*! Defines attribute name for array parameter, feature width */ #define FEATURE_WIDTH L"featureWidth" /*! Defines attribute name for array parameter, part number */ #define PART_NUMBER L"partNumber" /*! Defines attribute name for array parameter, test type */ #define TEST_TYPE L"testType" #define AFFY_FILE_CREATOR L"affymetrix-file-creator" /*! Defines attribute name for fludics parameter */ #define AFFY_FLUIDICS_PARAMETER_PREFIX L"affymetrix-" #define AFFY_FLUIDICS_PARAMETER_PREFIX_S std::wstring(AFFY_FLUIDICS_PARAMETER_PREFIX) } #endif affxparser/src/fusion/calvin_files/parameter/src/CELAlgorithmParameterNames.h0000644000175200017520000000432014516003651030504 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CELAlgorithmParameterNames_HEADER #define _CELAlgorithmParameterNames_HEADER /*! \file CELAlgorithmParameterNames.h This file defines a controlled vocabulary for some of the CEL algorithm parameter names. */ /*! Defines the cell margin algorithm parameter name. Type: int32_t */ #define CELLMARGIN_PARAM_NAME L"CellMargin" /*! Defines the upper left corner x grid coordinate algorithm parameter name. Type: float */ #define GRIDULX_PARAM_NAME L"GridULX" /*! Defines the upper left corner y grid coordinate algorithm parameter name. Type: float */ #define GRIDULY_PARAM_NAME L"GridULY" /*! Defines the upper right corner x grid coordinate algorithm parameter name. Type: float */ #define GRIDURX_PARAM_NAME L"GridURX" /*! Defines the upper right corner y grid coordinate algorithm parameter name. Type: float */ #define GRIDURY_PARAM_NAME L"GridURY" /*! Defines the lower right corner x grid coordinate algorithm parameter name. Type: float */ #define GRIDLRX_PARAM_NAME L"GridLRX" /*! Defines the lower right corner y grid coordinate algorithm parameter name. Type: float */ #define GRIDLRY_PARAM_NAME L"GridLRY" /*! Defines the lower left corner x grid coordinate algorithm parameter name. Type: float */ #define GRIDLLX_PARAM_NAME L"GridLLX" /*! Defines the lower left corner y grid coordinate algorithm parameter name. Type: float */ #define GRIDLLY_PARAM_NAME L"GridLLY" #endif affxparser/src/fusion/calvin_files/parameter/src/Parameter.h0000644000175200017520000000763114516003651025335 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _Parameter_HEADER_ #define _Parameter_HEADER_ /*! \file Parameter.h This file provides definitions of parameter values. */ #include "calvin_files/parameter/src/ParameterException.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_parameter { /*! A structure to hold a parameter name/value pair. */ typedef struct _tagParameterNameValuePair { /*! The name of the parameter */ std::wstring Name; /*! The value of the parameter */ std::wstring Value; /*! A copy operator. * * @param param The parameter to copy. */ _tagParameterNameValuePair operator=(_tagParameterNameValuePair param) { Name = param.Name; Value = param.Value; return *this; } /*! An equality operator. * * @param param The parameter to compare. * @return True if the parameter names are the same. */ bool operator==(_tagParameterNameValuePair param) { if (param.Name == Name) return true; return false; } /*! An equality operator. * * @param name A parameter name to compare. * @return True if the parameter names are the same. */ bool operator==(const std::wstring &name) { if (Name == name) return true; return false; } } ParameterNameValuePair; /*! An STL vector or parameter name value pairs. */ typedef std::vector ParameterNameValuePairVector; /*! An STL list or parameter name value pairs. */ typedef std::list ParameterNameValuePairList; /*! A structure to hold a parameter name/value pair with a controlled vocabulary list. */ typedef struct _tagParameterNameValueControlVocabulary { /*! The name of the parameter */ std::wstring Name; /*! The value of the parameter */ std::wstring Value; /*! The controlled vocabulary */ std::vector ControlledVocabulary; /*! A copy operator. * * @param param The parameter to copy. */ _tagParameterNameValueControlVocabulary operator=(_tagParameterNameValueControlVocabulary param) { Name = param.Name; Value = param.Value; ControlledVocabulary = param.ControlledVocabulary; return *this; } /*! An equality operator. * * @param param The parameter to compare. * @return True if the parameter names are the same. */ bool operator==(_tagParameterNameValueControlVocabulary param) { if (param.Name == Name) return true; return false; } /*! An equality operator. * * @param name A parameter name to compare. * @return True if the parameter names are the same. */ bool operator==(const std::wstring &name) { if (Name == name) return true; return false; } } ParameterNameValueControlVocabulary; /*! An STL vector or parameter name value pairs. */ typedef std::vector ParameterNameValueControlVocabularyVector; /*! An STL list or parameter name value pairs. */ typedef std::list ParameterNameValueControlVocabularyList; } #endif // _Parameter_HEADER_ affxparser/src/fusion/calvin_files/parameter/src/ParameterException.cpp0000644000175200017520000000313714516003651027544 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parameter/src/ParameterException.h" // namespace affymetrix_calvin_exceptions { const std::wstring ParameterMismatchException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::ParameterMismatchException thrown."; } const std::wstring UnexpectedParameterException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::UnexpectedParameterException thrown."; } const std::wstring OutOfRangeParameterException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::OutOfRangeParameterException thrown."; } const std::wstring ParameterStringTooLongException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::ParameterStringTooLongException thrown."; } } affxparser/src/fusion/calvin_files/parameter/src/ParameterException.h0000644000175200017520000000700214516003651027204 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ParameterException_HEADER_ #define _ParameterException_HEADER_ #include "calvin_files/exception/src/ExceptionBase.h" // /*! \file ParameterException.h This file defines parameter exceptions. */ namespace affymetrix_calvin_exceptions { /*! This exception indicates the parameter does not match the requested value * TODO: Add member to say what type is expected */ class ParameterMismatchException : public CalvinException { public: ParameterMismatchException() : CalvinException() {} ParameterMismatchException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; /*! This exception indicates the parameter type does not match the expected type * */ class UnexpectedParameterException : public CalvinException { public: UnexpectedParameterException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode, const std::wstring& expectedType_): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode), expectedType(expectedType_){} const std::wstring ToString(); protected: /*! String with the name of the expected type */ std::wstring expectedType; }; /*! This exception indicates a parameter value is out-of-range */ class OutOfRangeParameterException : public CalvinException { public: /*! Constructor * @param lower_ lower limit of the range * @param upper_ upper limit of the range */ OutOfRangeParameterException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode, const std::wstring& lower_, const std::wstring& upper_): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode), lower(lower_), upper(upper_) {} const std::wstring ToString(); protected: std::wstring lower; std::wstring upper; }; /*! This exception indicates that the parameter string is too long */ class ParameterStringTooLongException : public CalvinException { public: /*! Constructor * @param len Max length of the string */ ParameterStringTooLongException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode, int32_t len): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode), maxLen(len) {} const std::wstring ToString(); protected: int32_t maxLen; }; } #endif affxparser/src/fusion/calvin_files/parameter/src/ParameterFileData.cpp0000644000175200017520000000270614516003651027260 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parameter/src/ParameterFileData.h" // using namespace affymetrix_calvin_parameter; /* * Clear the members. */ ParameterFileData::ParameterFileData() { Clear(); } /* * Clear the members. */ void ParameterFileData::Clear() { parameters.clear(); parameterFileAttributes.company.clear(); parameterFileAttributes.userName.clear(); parameterFileAttributes.contentVersion.clear(); implementationAttributes.name.clear(); implementationAttributes.version.clear(); implementationAttributes.executableFileName.clear(); implementationAttributes.description.clear(); } affxparser/src/fusion/calvin_files/parameter/src/ParameterFileData.h0000644000175200017520000000713014516003651026721 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ParameterFileData_HEADER_ #define _ParameterFileData_HEADER_ /*! \file ParameterFileData.h This file provides definitions for data in a parameter file. */ #include #include #include // namespace affymetrix_calvin_parameter { /*! The attributes in the ParameterFile element. */ typedef struct _ParameterFileAttributesType { std::wstring company; /*! The company name. */ std::wstring userName; /*! The user name. */ std::wstring contentVersion; /*! The content version. */ } ParameterFileAttributesType; /*! The attributes in the Implementation element. */ typedef struct _ImplementationAttributesType { std::wstring name; /*! The name. */ std::wstring version; /*! The version number. */ std::wstring executableFileName; /*! The exe file name. */ std::wstring description; /*! The description */ } ImplementationAttributesType; /*! The attributes in the Parameter element. */ typedef struct _ParameterType { std::wstring name; /*! The name. */ std::wstring index; /*! The index. */ std::wstring displayName; /*! The display name. */ std::wstring category; /*! The category. */ std::wstring isEditable; /*! The idEditable flag */ std::wstring type; /*! The type */ std::wstring currentValue; /*! The current value */ std::wstring minValue; /*! The minimum value */ std::wstring maxValue; /*! The maximum value */ std::wstring defaultValue; /*! The default value */ std::wstring precision; /*! The precision for floating point numbers */ std::wstring maxLength; /*! The maximum length of a string value */ std::wstring description; /*! The description */ } ParameterType; /*! A list of parameters. */ typedef std::list ParameterTypeList; /*! Provides data storage for ParameterFile files. This version does not store all of the contents of a parameter file. Missing items includes the ParameterSet, MetaData and Control elements. */ class ParameterFileData { public: /*! Clears the members in the class. */ void Clear(); /*! Constructor. */ ParameterFileData(); /*! The parameter file attributes. */ ParameterFileAttributesType &ParameterFileAttributes() { return parameterFileAttributes; } /*! The implementation attributes. */ ImplementationAttributesType &ImplementationAttributes() { return implementationAttributes; } /*! The list of parameters. */ ParameterTypeList &Parameters() { return parameters; } private: /*! The parameter file attributes. */ ParameterFileAttributesType parameterFileAttributes; /*! The implementation attributes. */ ImplementationAttributesType implementationAttributes; /*! The list of parameters. */ ParameterTypeList parameters; }; } #endif // _ParameterFileData_HEADER_ affxparser/src/fusion/calvin_files/parameter/src/ParameterNameValueType.cpp0000644000175200017520000004757714516003651030345 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifdef _MSC_VER #include "windows.h" #endif #include "calvin_files/parameter/src/ParameterNameValueType.h" // #include "calvin_files/utils/src/StringUtils.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #else #include #include #include #endif /*! Custom MIME types */ const wchar_t Int8MIMEType[] = L"text/x-calvin-integer-8"; const wchar_t UInt8MIMEType[] = L"text/x-calvin-unsigned-integer-8"; const wchar_t Int16MIMEType[] = L"text/x-calvin-integer-16"; const wchar_t UInt16MIMEType[] = L"text/x-calvin-unsigned-integer-16"; const wchar_t Int32MIMEType[] = L"text/x-calvin-integer-32"; const wchar_t UInt32MIMEType[] = L"text/x-calvin-unsigned-integer-32"; const wchar_t FloatMIMEType[] = L"text/x-calvin-float"; const wchar_t TextMIMEType[] = L"text/plain"; const wchar_t AsciiMIMEType[] = L"text/ascii"; const u_int32_t NUMBER_BUFFER_LEN = 16; /*! Type table. Order matches the ParameterType enum */ static const wchar_t* TypeTable[] = { Int8MIMEType, UInt8MIMEType, Int16MIMEType, UInt16MIMEType, Int32MIMEType, UInt32MIMEType, FloatMIMEType, TextMIMEType, AsciiMIMEType }; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; MIMEValue::MIMEValue() { arr = 0; arrSize = 0; } MIMEValue::MIMEValue(const void* value, u_int32_t size) { arr = new char[size]; memcpy(arr, value, size); arrSize = size; } MIMEValue::MIMEValue(const MIMEValue& source) { arr = 0; *this = source; } MIMEValue::~MIMEValue() { delete [] arr; } MIMEValue& MIMEValue::operator=(const MIMEValue& source) { if (&source != this) { delete [] arr; arr = new char[source.arrSize]; memcpy(arr, source.arr, source.arrSize); arrSize = source.arrSize; } return *this; } bool MIMEValue::operator==(const MIMEValue& lhs) { if (lhs.arrSize != arrSize) return false; return (memcmp(lhs.arr, arr, arrSize) == 0); } bool MIMEValue::operator!=(const MIMEValue& lhs) { return !(*this==lhs); } void MIMEValue::SetValue(const void* value, u_int32_t size) { delete [] arr; arr = new char[size]; memset(arr, 0, size); memcpy(arr, value, size); arrSize = size; } const void* MIMEValue::GetValue(u_int32_t& size) const { size = arrSize; return arr; } /* * MIME constructor. Useful when reading in from a file. No interpretation attempted. */ ParameterNameValueType::ParameterNameValueType(const std::wstring& name, const void* mimeValue, int32_t mimeValueSize, const std::wstring& mimeType) : Name(name), Type(mimeType), Value(mimeValue, mimeValueSize) { } /* * MIME constructor. Useful when reading in from a file. No interpretation attempted. */ ParameterNameValueType::ParameterNameValueType(const std::wstring& name, const MIMEValue& mimeValue, const std::wstring& mimeType) : Name(name), Type(mimeType), Value(mimeValue) { } /* * Get the parameter type */ ParameterNameValueType::ParameterType ParameterNameValueType::GetParameterType() const { for (int i=0; i len) maxLn = (u_int32_t)reserve; u_int16_t* buf = new u_int16_t[maxLn]; for (u_int32_t i=0; i len) maxLn = (u_int32_t)reserve; u_int16_t* buf = new u_int16_t[maxLn]; for (u_int32_t i=0; i #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_parameter { /*! A class to hold a MIME value * Is responsible for managing the memory for the mime value */ class MIMEValue { public: /*! Constructs a MIME value - default constructor */ MIMEValue(); /*! Constructs a MIME value * @param value A pointer to array containing the MIME encoded value * @param size The size of the array */ MIMEValue(const void* value, u_int32_t size); /*! Copy constructor * @param source MIMEValue object to copy */ MIMEValue(const MIMEValue& source); /*! Destructor */ ~MIMEValue(); /*! Assignment operator * @param source The source. * @return The copied object. */ MIMEValue& operator=(const MIMEValue& source); /*! Operator equals * @param lhs The left hand side to compare. * @return True if the same. */ bool operator==(const MIMEValue& lhs); /*! Operator not equals. * @param lhs The let hand side to compare. * @return True if not equal. */ bool operator!=(const MIMEValue& lhs); /*! Set the controlled value * @param value A pointer to array containing the MIME encoded value * @param size The size of the array */ void SetValue(const void* value, u_int32_t size); /*! Get the controlled value * @param size Gets filled with the size of the constrolled array * @ return Const pointer to the MIME value array. (for writing)*/ const void* GetValue(u_int32_t& size) const; /*! Gets the size of the array */ u_int32_t Size() const { return arrSize; } private: /*! The buffer to hold the data. */ char* arr; /*! The size of the buffer. */ u_int32_t arrSize; }; /*! A class to hold a parameter name/value/type. * This class will convert several built-in types between the MIME string and their native types. */ class ParameterNameValueType { public: /*! Default constructor */ ParameterNameValueType() {} /*! MIME constructor. Useful when reading in from a file * @param name Parameter name * @param mimeValue MIME encoded value in a buffer * @param mimeValueSize The size in bytes of the MIME encoded value. * @param mimeType */ ParameterNameValueType(const std::wstring& name, const void* mimeValue, int32_t mimeValueSize, const std::wstring& mimeType); /*! MIME constructor. Useful when reading in from a file * @param name Parameter name * @param mimeValue MIME encoded value * @param mimeType */ ParameterNameValueType(const std::wstring& name, const MIMEValue& mimeValue, const std::wstring& mimeType); // Default copy constructor is fine /*! A copy operator. * @param param The parameter to copy. */ ParameterNameValueType operator=(const ParameterNameValueType& param) { Name = param.Name; Value = param.Value; Type = param.Type; return *this; } /*! An equality operator. Compares the parameter name. * @param param The parameter to compare. * @return True if the parameter names are the same. */ bool operator==(const ParameterNameValueType& param) const { if (param.Name == Name) return true; return false; } /*! An inequality operator. Compares the parameter name. * @param param The parameter to compare. * @return True if the parameter names are different. */ bool operator!=(const ParameterNameValueType& param) const { if (param.Name == Name) return false; return true; } /*! An equality operator. Compares the parameter name. * @param name A parameter name to compare. * @return True if the parameter names are the same. */ bool operator==(const std::wstring &name) const { if (Name == name) return true; return false; } /*! An inequality operator. Compares the parameter name. * @param name A parameter name to compare. * @return True if the parameter names are diffenent. */ bool operator!=(const std::wstring &name) const { if (Name == name) return false; return true; } /*! Less than comparison operator. Compares the parameter name. * @param param The parameter to compare. * @return True if the target parameter name is lexically less than param. */ bool operator<(const ParameterNameValueType ¶m) const { return (Name < param.Name); } /*! Greater than comparison operator. Compares the parameter name. * @param param The parameter to compare. * @return True if the target parameter name is lexically greter than param. */ bool operator>(const ParameterNameValueType ¶m) const { return (Name > param.Name); } /*! Get the name of the parameter * @ Name of the parameter */ std::wstring GetName() const { return Name; } /*! Set the name of the parameter. * @param value Name of the parameter. */ void SetName(const std::wstring& value) { Name = value; } /*! Enumerant of the built-in parameter types. */ enum ParameterType { Int8Type, /*! an 8 bit integer. */ UInt8Type, /*! an 8 bit unsigned integer. */ Int16Type, /*! a 16 bit integer. */ UInt16Type, /*! a 16 bit unsigned integer. */ Int32Type, /*! a 32 bit integer. */ UInt32Type, /*! a 32 bit unsigned integer. */ FloatType, /*! a 32 bit floating point. */ TextType, /*! a 16 bit character. */ AsciiType, /*! an 8 bit character. */ UnknownType /*! an 8 bit integer. */ }; /*! Get the parameter type * @return The parameter type of the object.*/ ParameterType GetParameterType() const; /*! Gets value as a int8_t * @return Value of the parameter as an int8_t * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not an int8_t */ int8_t GetValueInt8() const; /*! Sets the value as an int8_t * @param value */ void SetValueInt8(int8_t value); /*! Gets value as a u_int8_t * @return Value of the parameter as an u_int8_t * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not a u_int8_t */ u_int8_t GetValueUInt8() const; /*! Sets the value as a u_int8_t * @param value */ void SetValueUInt8(u_int8_t value); /*! Gets value as a int16_t * @return Value of the parameter as an int16_t * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not an int16_t */ int16_t GetValueInt16() const; /*! Sets the value as an int16_t * @param value */ void SetValueInt16(int16_t value); /*! Gets value as a u_int16_t * @return Value of the parameter as an u_int16_t * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not a u_int16_t */ u_int16_t GetValueUInt16() const; /*! Sets the value as a u_int16_t * @param value */ void SetValueUInt16(u_int16_t value); /*! Gets value as a int32_t * @return Value of the parameter as an int32_t * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not an int32_t */ int32_t GetValueInt32() const; /*! Sets the value as an int32_t * @param value */ void SetValueInt32(int32_t value); /*! Gets value as a u_int32_t * @return Value of the parameter as an u_int32_t * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not a u_int32_t */ u_int32_t GetValueUInt32() const; /*! Sets the value as a u_int32_t * @param value */ void SetValueUInt32(u_int32_t value); /*! Gets value as a float * @return Value of the parameter as an float * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not a float */ float GetValueFloat() const; /*! Sets the value as a float * @param value */ void SetValueFloat(float value); /*! Gets value as a wstring * @return Value of the parameter as an wstring * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not a text type */ std::wstring GetValueText() const; /*! Sets the value as a text type. * @param value String representation of the value. * @param reserve The total number of characters to reserve for the value. -1 indicates not to reserve any extra space. */ void SetValueText(const std::wstring &value, int32_t reserve = -1); /*! Gets value as a string * @return Value of the parameter as an string * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not a text type */ std::string GetValueAscii() const; /*! Sets the value as a text type. * @param value String representation of the value. * @param reserve The total number of characters to reserve for the value. -1 indicates not to reserve any extra space. */ void SetValueAscii(const std::string &value, int32_t reserve = -1); // Raw MIME methods /*! Returns the mime type without interpretation. * @return String of the MIME type */ std::wstring GetMIMEType() const { return Type; } /*! Sets the mime type without attempting to interpret it. * @param value */ void SetMIMEType(const std::wstring& value) { Type = value; } /*! Returns the mime value without interpretation. * @return MIME encoded string. */ MIMEValue GetMIMEValue() const { return Value; } /*! Sets the mime value without attempting to interpret it. * @param value MIME encoded string. */ void SetMIMEValue(const MIMEValue& value) { Value = value; } /*! Converts the value of known types to a string * @return A string representation of the value */ std::wstring ToString() const; protected: /*! Converts a value to an integer. * @param mimeType The string representation. * @return The integer representation. */ u_int32_t ValueToInt(const wchar_t* mimeType) const; /*! Converts an integer to a value. * @param value The integer representation. * @param type The string representation. */ void IntToValue(u_int32_t value, const wchar_t* type); protected: /*! The name of the parameter */ std::wstring Name; /*! The MIME type of the parameter */ std::wstring Type; /*! The MIME value of the parameter */ MIMEValue Value; }; /*! An STL vector or parameter name value types. */ typedef std::vector ParameterNameValueTypeVector; /*! An STL list or parameter name value types. */ typedef std::list ParameterNameValueTypeList; /*! An STL constant iterator of ParameterNameValueType */ typedef std::vector::const_iterator ParameterNameValueTypeConstIt; /*! An STL iterator of ParameterNameValueType */ typedef std::vector::iterator ParameterNameValueTypeIt; /*! A class to hold a name/value/type/default/required attributes. */ class ParameterNameValueDefaultRequiredType : public ParameterNameValueType { public: /*! The type of value. */ enum ParameterValueType { NoParameterType, /*! Not set. */ IntegerParameterType, /*! Integer stored in the MIME value. */ FloatParameterType, /*! Floating point stored in the MIME value. */ TextParameterType, /*! Text stored in the MIME value. */ DateParameterType, /*! Date stored in the MIME value as text. */ TimeParameterType, /*! Time stored in the MIME value as text. */ DateTimeParameterType, /*! DateTime stored in the MIME value as text. */ ControlSingleParameterType, /*! Controlled single selection stored in the MIME value as text. */ ControlMultiParameterType /*! Controlled multi selection stored in the controlMulti list. */ }; /*! Converts the type to a string. * @param value The type. * @return The string representation. */ static std::wstring ParameterValueTypeToString(ParameterValueType value); /*! Converts the string to a type. * @param value The string representation. * @return The type. */ static ParameterValueType ParameterValueTypeFromString(const std::wstring &value); protected: /*! The MIME value of the default parameter */ MIMEValue defaultValue; /*! A flag to indicate if a default exist. */ bool hasDefault; /*! A flag to indicate if the parameter is required. */ bool required; /*! A list of parameter values for controlled vocabulary. */ std::list controlled; /*! A list of multi-selected controlled values. */ std::list controlMultiValues; /*! The type of value stored. */ ParameterValueType valueType; /*! Converts a value to an integer. * @param mimeType The string representation. * @return The integer representation. */ u_int32_t DefaultValueToInt(const wchar_t* mimeType) const; /*! Converts an integer to a value. * @param value The integer representation. * @param type The string representation. */ void IntToDefaultValue(u_int32_t value, const wchar_t* type); /*! Sets the parameter type. */ void SetParameterType(); public: /*! A list of multi-selected controlled values. * @return The multi-selected controlled values. */ std::list &ControlMultiValues() { return controlMultiValues; } /*! The type of value stored. */ ParameterValueType &ValueType() { return valueType; } /*! A list of parameter values for controlled vocabulary. * @return The list of parameter values for controlled vocabulary. */ std::list &ControlledVocabulary() { return controlled; } /*! Returns the mime value without interpretation. * @return MIME encoded string. */ MIMEValue &DefaultMIMEValue() { return defaultValue; } /*! Gets the required flag. * @return The required flag. */ bool &RequiredFlag() { return required; } /*! A flag to indicate if a default exist. * @return The flag indicating if a default value exists. */ bool &HasDefault() { return hasDefault; } /*! Default constructor */ ParameterNameValueDefaultRequiredType() { required = false; hasDefault = false; valueType = NoParameterType; } /*! MIME constructor. Useful when reading in from a file * @param name Parameter name * @param mimeValue MIME encoded value in a buffer * @param mimeValueSize The size in bytes of the MIME encoded value. * @param mimeType The MIME type. * @param defaultMimeValue The default MIME encoded value in a buffer. * @param defaultMimeValueSize The size in bytes of the default MIME encoded value. * @param req Flag to indicate if the parameter is required. */ ParameterNameValueDefaultRequiredType(const std::wstring& name, const void* mimeValue, int32_t mimeValueSize, const std::wstring& mimeType, const void *defaultMimeValue, int32_t defaultMimeValueSize, bool req); /*! MIME constructor. Useful when reading in from a file * @param name Parameter name * @param mimeValue MIME encoded value * @param mimeType The MIME type. * @param defaultMimeValue The default MIME encoded value in a buffer. * @param req Flag to indicate if the parameter is required. */ ParameterNameValueDefaultRequiredType(const std::wstring& name, const MIMEValue& mimeValue, const std::wstring& mimeType, const MIMEValue &defaultMimeValue, bool req); /*! A copy operator. * @param param The parameter to copy. */ ParameterNameValueDefaultRequiredType operator=(const ParameterNameValueDefaultRequiredType& param); /*! Converts default value to a string * @return A string representation of the default value */ std::wstring DefaultToString(); /*! Gets default value as a int8_t * @return Value of the parameter as an int8_t * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not an int8_t */ int8_t GetDefaultValueInt8() const; /*! Sets the default value as an int8_t * @param value The value */ void SetDefaultValueInt8(int8_t value); /*! Ges the default value as a u_int8_t * @return Value of the parameter as an u_int8_t * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not a u_int8_t */ u_int8_t GetDefaultValueUInt8() const; /*! Sets the default value as a u_int8_t * @param value The value */ void SetDefaultValueUInt8(u_int8_t value); /*! Ges the default value as a int16_t * @return Value of the parameter as an int16_t * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not an int16_t */ int16_t GetDefaultValueInt16() const; /*! Sets the default value as an int16_t * @param value The value */ void SetDefaultValueInt16(int16_t value); /*! Ges the default value as a u_int16_t * @return Value of the parameter as an u_int16_t * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not a u_int16_t */ u_int16_t GetDefaultValueUInt16() const; /*! Sets the default value as a u_int16_t * @param value The value */ void SetDefaultValueUInt16(u_int16_t value); /*! Ges the default value as a int32_t * @return Value of the parameter as an int32_t * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not an int32_t */ int32_t GetDefaultValueInt32() const; /*! Sets the default value as an int32_t * @param value The value */ void SetDefaultValueInt32(int32_t value); /*! Ges the default value as a u_int32_t * @return Value of the parameter as an u_int32_t * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not a u_int32_t */ u_int32_t GetDefaultValueUInt32() const; /*! Sets the default value as a u_int32_t * @param value The value */ void SetDefaultValueUInt32(u_int32_t value); /*! Ges the default value as a float * @return Value of the parameter as an float * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not a float */ float GetDefaultValueFloat() const; /*! Sets the default value as a float * @param value The value */ void SetDefaultValueFloat(float value); /*! Ges the default value as a wstring * @return Value of the parameter as an wstring * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not a text type */ std::wstring GetDefaultValueText() const; /*! Sets the default value as a text type. * @param value String representation of the default value. * @param reserve The total number of characters to reserve for the value. -1 indicates not to reserve any extra space. */ void SetDefaultValueText(const std::wstring &value, int32_t reserve=-1); /*! Ges the default value as a string * @return Value of the parameter as an string * @exception affymetrix_calvin_exceptions::ParameterMismatchException Parameter is not a text type */ std::string GetDefaultValueAscii() const; /*! Sets the default value as a text type. * @param value String representation of the default value. * @param reserve The total number of characters to reserve for the value. -1 indicates not to reserve any extra space. */ void SetDefaultValueAscii(const std::string &value, int32_t reserve=-1); }; /*! An STL vector or parameter name value types. */ typedef std::vector ParameterNameValueDefaultRequiredTypeVector; /*! An STL list or parameter name value types. */ typedef std::list ParameterNameValueDefaultRequiredTypeList; /*! An STL constant iterator of ParameterNameValueDefaultRequiredType */ typedef std::vector::const_iterator ParameterNameValueDefaultRequiredTypeConstIt; /*! An STL iterator of ParameterNameValueDefaultRequiredType */ typedef std::vector::iterator ParameterNameValueDefaultRequiredTypeIt; } #endif //_ParameterNameValueType_HEADER_ affxparser/src/fusion/calvin_files/parsers/0000755000175200017520000000000014516003651022145 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/parsers/src/0000755000175200017520000000000014516022540022732 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/parsers/src/ArrayFileReader.cpp0000644000175200017520000000633214516003651026445 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/ArrayFileReader.h" // #include "calvin_files/parsers/src/SAXArrayHandlers.h" // #include #include // #include #include // using namespace affymetrix_calvin_array; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; XERCES_CPP_NAMESPACE_USE; /* * Initialize the class. */ ArrayFileReader::ArrayFileReader() { } /* * Clear the data. */ ArrayFileReader::~ArrayFileReader() { } /* * Read the entire file using the XML SAX parser. */ bool ArrayFileReader::Read(const std::string &fileName, affymetrix_calvin_array::ArrayData &arrayData, bool headerOnly) { arrayData.Clear(); // Initialize the XML4C2 system try { XMLPlatformUtils::Initialize(); } catch (const XMLException&) { return false; } bool status = false; SAXParser* parser = new SAXParser; parser->setValidationScheme(SAXParser::Val_Never); parser->setLoadExternalDTD(false); parser->setDoNamespaces(false); parser->setDoSchema(false); parser->setValidationSchemaFullChecking(false); SAXArrayHandlers handler(&arrayData, headerOnly); parser->setDocumentHandler(&handler); parser->setErrorHandler(&handler); try { parser->parse(fileName.c_str()); int errorCount = parser->getErrorCount(); if (errorCount == 0) { status = true; fileVersionNumber = handler.FileVersionNumber(); } } catch (SAXArrayStopParsingException) { status = true; fileVersionNumber = handler.FileVersionNumber(); } catch (...) { status = false; } delete parser; XMLPlatformUtils::Terminate(); return status; } /* * Check if the data type matches what is in the file. */ bool ArrayFileReader::IsFileType(const std::string &fileName, const affymetrix_calvin_utilities::AffymetrixGuidType &dataTypeId) { return (ArrayFileReader::DataTypeIdentifier(fileName) == dataTypeId); } /* * Read just the first few entries to determine if this file is * of the right type. Check the magic number, version number * and data type identifier. If they all match then this is the right * type of file. */ affymetrix_calvin_utilities::AffymetrixGuidType ArrayFileReader::DataTypeIdentifier(const std::string &fileName) { ArrayFileReader reader; ArrayData arrayData; reader.Read(fileName, arrayData, true); return arrayData.DataTypeIdentifier(); } affxparser/src/fusion/calvin_files/parsers/src/ArrayFileReader.h0000644000175200017520000000505014516003651026106 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ArrayFileReader_HEADER_ #define _ArrayFileReader_HEADER_ /*! \file ArrayFileReader.h This file provides interfaces to read an array file. */ #include "calvin_files/array/src/ArrayData.h" #include "calvin_files/parameter/src/Parameter.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include // namespace affymetrix_calvin_io { /*! This class provides interfaces to read an array file. */ class ArrayFileReader { public: /*! Constructor */ ArrayFileReader(); /*! Destructor */ ~ArrayFileReader(); protected: /*! The files version number. */ std::wstring fileVersionNumber; public: /*! Reads the entire contents of the file. * * @param fileName The name of the array file to read. * @param arrayData The array data to read from the file. * @param headerOnly Flag to indicate that the header line should only be read. */ bool Read(const std::string &fileName, affymetrix_calvin_array::ArrayData &arrayData, bool headerOnly=false); /*! Determines if a file is of the specified type. * * @param fileName The name of the file to check. * @param dataTypeId An identifier to the type of data. * @return True if the file matches the type desired. */ static bool IsFileType(const std::string &fileName, const affymetrix_calvin_utilities::AffymetrixGuidType &dataTypeId); /*! The identifier of the type of data stored in the file. * * @param fileName The name of the file to check. * @return The identifier of the type of data. */ static affymetrix_calvin_utilities::AffymetrixGuidType DataTypeIdentifier(const std::string &fileName); }; }; #endif // _ArrayFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/AuditFileConstants.h0000644000175200017520000000326714516003651026660 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AuditFileConstants_HEADER_ #define _AuditFileConstants_HEADER_ /*! \file AuditFileConstants.h This file provides constants for an audit file. */ namespace affymetrix_calvin_io { /*! The tag for the user name */ #define AUDIT_USER_TAG "User" /*! The tag for the date */ #define AUDIT_DATE_TAG "Date" /*! The tag for the time */ #define AUDIT_TIME_TAG "Time" /*! The tag for the action type */ #define AUDIT_ACTION_TAG "Action" /*! The tag for the array id */ #define AUDIT_ARRAY_ID_TAG "ArrayId" /*! The tag for an input id */ #define AUDIT_INPUT_ID_TAG "InputId" /*! The tag for an output id */ #define AUDIT_OUTPUT_ID_TAG "OutputId" /*! The tag for the start of an audit entry */ #define AUDIT_START_TAG "#Audit-Start" /*! The tag for the end of an audit entry */ #define AUDIT_END_TAG "#Audit-End" }; #endif // _AuditFileConstants_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/AuditFileReader.cpp0000644000175200017520000000667414516003651026446 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "calvin_files/parsers/src/AuditFileReader.h" // #include "calvin_files/parsers/src/AuditFileConstants.h" #include "calvin_files/utils/src/StringUtils.h" // #include "util/Fs.h" // #include #include #include #include // using namespace affymetrix_calvin_array; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; using namespace std; /* * Initialize the class. */ AuditFileReader::AuditFileReader() { } /* * Clear the data. */ AuditFileReader::~AuditFileReader() { } /* * Read the entire file. */ bool AuditFileReader::Read(const std::string &fileName, affymetrix_calvin_array::ArrayAuditEntryList &auditData) { auditData.clear(); // Open the file. std::ifstream fileStream; Fs::aptOpen(fileStream, fileName); if (!fileStream) { return false; } ArrayAuditEntry entry; DateTime dateTime; const int MAX_LINE_LENGTH = 1024; char buf[MAX_LINE_LENGTH]; std::string line; std::string name; std::string value; // Read the lines of the file. while (fileStream.getline(buf, MAX_LINE_LENGTH)) { line = buf; // Start tag if (line == AUDIT_START_TAG) { dateTime.Clear(); entry.Clear(); continue; } // End tag else if (line == AUDIT_END_TAG) { entry.DateTime() = dateTime; auditData.push_back(entry); continue; } // Split the line to the name and value separated by a = sign const char *index = strstr(line.c_str(), "="); if (index) { value = index+1; name = line.substr(0, line.length()-strlen(index)); // User name tag if (name == AUDIT_USER_TAG) entry.UserName() = StringUtils::ConvertMBSToWCS(value); // Date time else if (name == AUDIT_DATE_TAG) dateTime.Date(StringUtils::ConvertMBSToWCS(value)); // Date time else if (name == AUDIT_TIME_TAG) dateTime.Time(StringUtils::ConvertMBSToWCS(value)); // Action else if (name == AUDIT_ACTION_TAG) entry.ActionType() = value; // Array id else if (name == AUDIT_ARRAY_ID_TAG) entry.ArrayGuid() = value; // Input id else if (name == AUDIT_INPUT_ID_TAG) entry.InputFileGuids().push_back(value); // Output id else if (name == AUDIT_OUTPUT_ID_TAG) entry.OutputFileGuids().push_back(value); // Parameter name/value pair. else { ParameterNameValuePair param; param.Name = StringUtils::ConvertMBSToWCS(name); param.Value = StringUtils::ConvertMBSToWCS(value); entry.ActionParameters().push_back(param); } } } // Close the file and return the status fileStream.close(); return (fileStream.fail() != 0); } affxparser/src/fusion/calvin_files/parsers/src/AuditFileReader.h0000644000175200017520000000306014516003651026075 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AuditFileReader_HEADER_ #define _AuditFileReader_HEADER_ /*! \file AuditFileReader.h This file provides interfaces to read an audit file. */ #include "calvin_files/array/src/ArrayAudit.h" // namespace affymetrix_calvin_io { /*! This class provides interfaces to read an audit file. */ class AuditFileReader { public: /*! Constructor */ AuditFileReader(); /*! Destructor */ ~AuditFileReader(); public: /*! Reads the entire contents of the file. * * @param fileName The name of the audit file to read. * @param auditData The audit data to read from the file. */ bool Read(const std::string &fileName, affymetrix_calvin_array::ArrayAuditEntryList &auditData); }; }; #endif // _AuditFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/CDFFileReader.cpp0000644000175200017520000000346614516003651025770 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/CDFFileReader.h" // #include "calvin_files/parsers/src/GenericFileReader.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_exceptions; /* * Constructor */ CDFFileReader::CDFFileReader() { } /* * Destructor */ CDFFileReader::~CDFFileReader() { } /* */ void CDFFileReader::Read(CDFData& data, ReadMode mode) { data.Clear(); //data.GetGenericData().UseMemoryMapping(false); GenericFileReader reader; if (fileName.empty()) fileName = data.GetFilename(); reader.SetFilename(fileName); reader.ReadHeader(data.GetGenericData(), GenericFileReader::ReadNoDataGroupHeader); switch(mode) { case ReadSequential: data.PrepareForSequentialAccess(); break; case ReadByProbeSetNumber: data.PrepareForAccessByProbeSetIndex(); break; case ReadByProbeSetName: data.PrepareForAccessByProbeSetName(); break; default: data.PrepareForSequentialAccess(); break; } } affxparser/src/fusion/calvin_files/parsers/src/CDFFileReader.h0000644000175200017520000000502414516003651025425 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CDFFileReader_HEADER_ #define _CDFFileReader_HEADER_ /*! \file CDFFileReader.h This file provides methods to read a CDF library file. */ //#include "FileException.h" //#include "DataGroupReader.h" #include "calvin_files/data/src/CDFData.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // dont show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class reads a CDF library file. It is an interpreter class.*/ class CDFFileReader { public: enum ReadMode { ReadSequential, ReadByProbeSetNumber, ReadByProbeSetName } ; public: /*! Constructor */ CDFFileReader(); /*! Destructor */ ~CDFFileReader(); public: /*! Gets the name of the input file. * * @return The name of the file to read. */ const std::string GetFilename() const { return fileName; } /*! Sets the name of the input file. * * @param name The name of the file to read. */ void SetFilename(const std::string &name) { fileName = name; } /*! Reads the file header of the generic file. * * @param data A reference to a CDFData object that will receive information from the file. * @param mode Indicates how the CDF data will be accessed * @exception affymetrix_calvin_exceptions::FileNotFoundException The file does not exist. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void Read(CDFData& data, ReadMode mode=ReadSequential); protected: /*! Name of the file to read */ std::string fileName; }; } #endif // _CDFFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/CHPFileReader.cpp0000644000175200017520000000263614516003651026004 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/CHPFileReader.h" // #include "calvin_files/parsers/src/GenericFileReader.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_exceptions; /* * Constructor */ CHPFileReader::CHPFileReader() { } /* * Destructor */ CHPFileReader::~CHPFileReader() { } void CHPFileReader::Read(CHPData& data) { data.Clear(); GenericFileReader reader; if (fileName.empty()) { fileName = data.GetFilename(); } reader.SetFilename(fileName); reader.ReadHeader(data.GetGenericData()); } affxparser/src/fusion/calvin_files/parsers/src/CHPFileReader.h0000644000175200017520000000465014516003651025447 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPFileReader_HEADER_ #define _CHPFileReader_HEADER_ /*! \file CHPFileReader.h This file provides methods to read a CHP data file. */ //#include "FileException.h" #include "calvin_files/data/src/CHPData.h" #include "calvin_files/parsers/src/DataGroupReader.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // dont show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class reads a CHP data file. It is an interpreter class.*/ class CHPFileReader { public: /*! Constructor */ CHPFileReader(); /*! Destructor */ ~CHPFileReader(); public: /*! Gets the name of the input file. * * @return The name of the file to read. */ const std::string GetFilename() const { return fileName; } /*! Sets the name of the input file. * * @param name The name of the file to read. */ void SetFilename(const std::string &name) { fileName = name; } /*! Reads the file header of the generic file and reads all the DataPlaneHeader information. * * @param data A reference to a GenericData object that will receive information from the file. * @exception affymetrix_calvin_exceptions::FileNotFoundException The file does not exist. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void Read(CHPData& data); protected: /*! Name of the file to read */ std::string fileName; }; } #endif // _CHPFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/CHPMultiDataFileReader.cpp0000644000175200017520000000273514516003651027611 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/CHPMultiDataFileReader.h" // #include "calvin_files/parsers/src/GenericFileReader.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_exceptions; /* * Constructor */ CHPMultiDataFileReader::CHPMultiDataFileReader() { } /* * Destructor */ CHPMultiDataFileReader::~CHPMultiDataFileReader() { } void CHPMultiDataFileReader::Read(CHPMultiDataData& data) { data.Clear(); GenericFileReader reader; if (fileName.empty()) { fileName = data.GetFilename(); } reader.SetFilename(fileName); reader.ReadHeader(data.GetGenericData()); } affxparser/src/fusion/calvin_files/parsers/src/CHPMultiDataFileReader.h0000644000175200017520000000456314516003651027257 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPMultiDataFileReader_HEADER_ #define _CHPMultiDataFileReader_HEADER_ /*! \file CHPMultiDataFileReader.h This file provides methods to read a multi data CHP data file. */ #include "calvin_files/data/src/CHPMultiDataData.h" #include "calvin_files/parsers/src/DataGroupReader.h" // #include #include #include // namespace affymetrix_calvin_io { /*! This class reads a Signal CHP data file. It is an interpreter class.*/ class CHPMultiDataFileReader { public: /*! Constructor */ CHPMultiDataFileReader(); /*! Destructor */ ~CHPMultiDataFileReader(); public: /*! Gets the name of the input file. * * @return The name of the file to read. */ const std::string GetFilename() const { return fileName; } /*! Sets the name of the input file. * * @param name The name of the file to read. */ void SetFilename(const std::string &name) { fileName = name; } /*! Reads the file header of the generic file and reads all the DataPlaneHeader information. * * @param data A reference to a GenericData object that will receive information from the file. * @exception affymetrix_calvin_exceptions::FileNotFoundException The file does not exist. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void Read(CHPMultiDataData& data); protected: /*! Name of the file to read */ std::string fileName; }; } #endif // _CHPMultiDataFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/CHPQuantificationDetectionFileReader.cpp0000644000175200017520000000307714516003651032542 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/CHPQuantificationDetectionFileReader.h" // #include "calvin_files/parsers/src/GenericFileReader.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_exceptions; /* * Constructor */ CHPQuantificationDetectionFileReader::CHPQuantificationDetectionFileReader() { } /* * Destructor */ CHPQuantificationDetectionFileReader::~CHPQuantificationDetectionFileReader() { } void CHPQuantificationDetectionFileReader::Read(CHPQuantificationDetectionData& data) { data.Clear(); GenericFileReader reader; if (fileName.empty()) { fileName = data.GetFilename(); } reader.SetFilename(fileName); reader.ReadHeader(data.GetGenericData()); } affxparser/src/fusion/calvin_files/parsers/src/CHPQuantificationDetectionFileReader.h0000644000175200017520000000477714516003651032217 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPQuantificationDetectionFileReader_HEADER_ #define _CHPQuantificationDetectionFileReader_HEADER_ /*! \file CHPQuantificationDetectionFileReader.h This file provides methods to read a quantification/detection CHP data file. */ #include "calvin_files/data/src/CHPQuantificationDetectionData.h" #include "calvin_files/parsers/src/DataGroupReader.h" // #include #include #include // namespace affymetrix_calvin_io { /*! This class reads a Signal CHP data file. It is an interpreter class.*/ class CHPQuantificationDetectionFileReader { public: /*! Constructor */ CHPQuantificationDetectionFileReader(); /*! Destructor */ ~CHPQuantificationDetectionFileReader(); public: /*! Gets the name of the input file. * * @return The name of the file to read. */ const std::string GetFilename() const { return fileName; } /*! Sets the name of the input file. * * @param name The name of the file to read. */ void SetFilename(const std::string &name) { fileName = name; } /*! Reads the file header of the generic file and reads all the DataPlaneHeader information. * * @param data A reference to a GenericData object that will receive information from the file. * @exception affymetrix_calvin_exceptions::FileNotFoundException The file does not exist. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void Read(CHPQuantificationDetectionData& data); protected: /*! Name of the file to read */ std::string fileName; }; } #endif // _CHPQuantificationDetectionFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/CHPQuantificationFileReader.cpp0000644000175200017520000000300014516003651030665 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/CHPQuantificationFileReader.h" // #include "calvin_files/parsers/src/GenericFileReader.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_exceptions; /* * Constructor */ CHPQuantificationFileReader::CHPQuantificationFileReader() { } /* * Destructor */ CHPQuantificationFileReader::~CHPQuantificationFileReader() { } void CHPQuantificationFileReader::Read(CHPQuantificationData& data) { data.Clear(); GenericFileReader reader; if (fileName.empty()) { fileName = data.GetFilename(); } reader.SetFilename(fileName); reader.ReadHeader(data.GetGenericData()); } affxparser/src/fusion/calvin_files/parsers/src/CHPQuantificationFileReader.h0000644000175200017520000000463414516003651030350 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPQuantificationFileReader_HEADER_ #define _CHPQuantificationFileReader_HEADER_ /*! \file CHPQuantificationFileReader.h This file provides methods to read a Signal CHP data file. */ #include "calvin_files/data/src/CHPQuantificationData.h" #include "calvin_files/parsers/src/DataGroupReader.h" // #include #include #include // namespace affymetrix_calvin_io { /*! This class reads a Signal CHP data file. It is an interpreter class.*/ class CHPQuantificationFileReader { public: /*! Constructor */ CHPQuantificationFileReader(); /*! Destructor */ ~CHPQuantificationFileReader(); public: /*! Gets the name of the input file. * * @return The name of the file to read. */ const std::string GetFilename() const { return fileName; } /*! Sets the name of the input file. * * @param name The name of the file to read. */ void SetFilename(const std::string &name) { fileName = name; } /*! Reads the file header of the generic file and reads all the DataPlaneHeader information. * * @param data A reference to a GenericData object that will receive information from the file. * @exception affymetrix_calvin_exceptions::FileNotFoundException The file does not exist. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void Read(CHPQuantificationData& data); protected: /*! Name of the file to read */ std::string fileName; }; } #endif // _CHPQuantificationFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/CHPTilingFileReader.cpp0000644000175200017520000000271014516003651027144 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/CHPTilingFileReader.h" // #include "calvin_files/parsers/src/GenericFileReader.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_exceptions; /* * Constructor */ CHPTilingFileReader::CHPTilingFileReader() { } /* * Destructor */ CHPTilingFileReader::~CHPTilingFileReader() { } void CHPTilingFileReader::Read(CHPTilingData& data) { data.Clear(); GenericFileReader reader; if (fileName.empty()) { fileName = data.GetFilename(); } reader.SetFilename(fileName); reader.ReadHeader(data.GetGenericData()); } affxparser/src/fusion/calvin_files/parsers/src/CHPTilingFileReader.h0000644000175200017520000000452414516003651026616 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPTilingFileReader_HEADER_ #define _CHPTilingFileReader_HEADER_ /*! \file CHPTilingFileReader.h This file provides methods to read a tiling CHP data file. */ #include "calvin_files/data/src/CHPTilingData.h" #include "calvin_files/parsers/src/DataGroupReader.h" // #include #include #include // namespace affymetrix_calvin_io { /*! This class reads a tiling CHP data file. It is an interpreter class.*/ class CHPTilingFileReader { public: /*! Constructor */ CHPTilingFileReader(); /*! Destructor */ ~CHPTilingFileReader(); public: /*! Gets the name of the input file. * * @return The name of the file to read. */ const std::string GetFilename() const { return fileName; } /*! Sets the name of the input file. * * @param name The name of the file to read. */ void SetFilename(const std::string &name) { fileName = name; } /*! Reads the file header of the generic file and reads all the DataPlaneHeader information. * * @param data A reference to a GenericData object that will receive information from the file. * @exception affymetrix_calvin_exceptions::FileNotFoundException The file does not exist. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void Read(CHPTilingData& data); protected: /*! Name of the file to read */ std::string fileName; }; } #endif // _CHPTilingFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/CelFileReader.cpp0000644000175200017520000000250714516003651026072 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/CelFileReader.h" // #include "calvin_files/parsers/src/GenericFileReader.h" // using namespace affymetrix_calvin_io; /* * Constructor */ CelFileReader::CelFileReader() { } /* * Destructor */ CelFileReader::~CelFileReader() { } /* */ void CelFileReader::Read(CelFileData& data) { data.Clear(); GenericFileReader reader; if (fileName.empty()) fileName = data.GetFilename(); reader.SetFilename(fileName); reader.ReadHeader(data.GetGenericData()); } affxparser/src/fusion/calvin_files/parsers/src/CelFileReader.h0000644000175200017520000000477514516003651025550 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CelFileReader_HEADER_ #define _CelFileReader_HEADER_ /*! \file CelFileReader.h This file provides methods to read a CEL data file. */ #include "calvin_files/data/src/CELData.h" #include "calvin_files/parsers/src/FileException.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // dont show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class reads a CEL data file. It is an interpreter class.*/ class CelFileReader { public: /*! Constructor */ CelFileReader(); /*! Destructor */ ~CelFileReader(); public: /*! Gets the name of the input file. * * @return The name of the file to read. */ const std::string GetFilename() const { return fileName; } /*! Sets the name of the input file. * * @param name The name of the file to read. */ void SetFilename(const std::string &name) { fileName = name; } /*! Reads the file header of the generic file and reads all the DataSetHeader information. * * @param data A reference to a GenericData object that will receive information from the file. * @exception affymetrix_calvin_exceptions::FileNotFoundException The file does not exist. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void Read(CelFileData& data); protected: /*! Name of the file to read */ std::string fileName; }; } #endif // _CelFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/DATFileReader.cpp0000644000175200017520000000663114516003651026001 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/DATFileReader.h" // #include "calvin_files/parsers/src/GenericFileReader.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_exceptions; /* * Constructor */ DATFileReader::DATFileReader() { } /* * Destructor */ DATFileReader::~DATFileReader() { } /* */ void DATFileReader::Read(DATData& data) { data.Clear(); GenericFileReader reader; if (fileName.empty()) { fileName = data.GetFilename(); } reader.SetFilename(fileName); reader.Open(data.GetGenericData()); DataGroupReader dcReader = reader.GetDataGroupReader(0); ReadGlobalGrid(data, dcReader); ReadSubgrids(data, dcReader); reader.Close(); } /* * Read the global grid */ void DATFileReader::ReadGlobalGrid(DATData& data, DataGroupReader& dcReader) { try { FRegion rgn; DataSetReader dpReader = dcReader.GetDataSetReader(DAT_GLOBAL_GRID); int32_t cols = dpReader.GetDataSetHeader().GetColumnCnt(); u_int32_t gridStatus = 0; if (cols > 0) { dpReader.Read(gridStatus); } // coordinates are in floats, assume there is only one row FPoint value; for (int32_t i = 1; i < cols; i+=2) { dpReader.Read(value.x); dpReader.Read(value.y); rgn.pts.push_back(value); } data.SetGlobalGrid(gridStatus, rgn); ReadGridAlignmentAlgorithmParameters(data, dpReader.GetDataSetHeader()); } catch(DataSetNotFoundException& e) { // ignore } } /* * Read subgrids */ void DATFileReader::ReadSubgrids(DATData& data, DataGroupReader& dcReader) { try { FRegion rgn; DataSetReader dpReader = dcReader.GetDataSetReader(DAT_SUBGRID); int32_t rows = dpReader.GetDataSetHeader().GetRowCnt(); int32_t cols = dpReader.GetDataSetHeader().GetColumnCnt(); // coordinates are in floats, assume there is only one row FPoint value; for (int32_t n = 0; n < rows; n++) { u_int32_t gridStatus = 0; if (cols > 0) { dpReader.Read(gridStatus); } for (int32_t i = 1; i < cols; i+=2) { dpReader.Read(value.x); dpReader.Read(value.y); rgn.pts.push_back(value); } data.AddSubgrid(gridStatus, rgn); rgn.Clear(); } } catch(DataSetNotFoundException& e) { // ignore } } /* * Read the grid alignment algorithm parameters. */ void DATFileReader::ReadGridAlignmentAlgorithmParameters(DATData& data, const DataSetHeader& dsh) { data.ClearGridAlignmentAlgorithmParameters(); ParameterNameValueTypeConstIt begin, end; dsh.GetNameValIterators(begin, end); for (ParameterNameValueTypeConstIt ii = begin; ii != end; ++ii) { data.AddGridAlignmentAlgorithmParameter(*ii); } } affxparser/src/fusion/calvin_files/parsers/src/DATFileReader.h0000644000175200017520000000566714516003651025456 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DATFileReader_HEADER_ #define _DATFileReader_HEADER_ /*! \file DATFileReader.h This file provides methods to read a DAT data file. */ //#include "FileException.h" #include "calvin_files/data/src/DATData.h" #include "calvin_files/parsers/src/DataGroupReader.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // dont show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class reads a DAT data file. It is an interpreter class.*/ class DATFileReader { public: /*! Constructor */ DATFileReader(); /*! Destructor */ ~DATFileReader(); public: /*! Gets the name of the input file. * * @return The name of the file to read. */ const std::string GetFilename() const { return fileName; } /*! Sets the name of the input file. * * @param name The name of the file to read. */ void SetFilename(const std::string &name) { fileName = name; } /*! Reads the file header of the generic file and reads all the DataSetHeader information. * * @param data A reference to a DATData object that will receive information from the file. * @exception affymetrix_calvin_exceptions::FileNotFoundException The file does not exist. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void Read(DATData& data); protected: /*! UNTESTED - Read the global grid */ void ReadGlobalGrid(DATData& data, DataGroupReader& dcReader); /*! UNTESTED - Read the subgrids */ void ReadSubgrids(DATData& data, DataGroupReader& dcReader); /*! Read the grid alignment algorithm parameters. * @param data DATData object to which to add the grid alignment algorithm parameters. * @param dsh DataSetHeader from which to read the grid alignment algorithm parameters. */ void ReadGridAlignmentAlgorithmParameters(DATData& data, const DataSetHeader& dsh); protected: /*! Name of the file to read */ std::string fileName; }; } #endif // _DATFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/DataGroupHeaderReader.cpp0000644000175200017520000001175614516003651027574 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/DataGroupHeaderReader.h" // #include "calvin_files/parsers/src/DataSetHeaderReader.h" #include "calvin_files/parsers/src/FileInput.h" // #include #include // using namespace affymetrix_calvin_io; /* * Constructor */ DataGroupHeaderReader::DataGroupHeaderReader() { } /* * Reads all the DataGroupHeaders in a file and the minimum information for each DataSetHeader in every DataGroup. */ void DataGroupHeaderReader::ReadAllMinimumInfo(std::ifstream& fileStream, FileHeader& fh, u_int32_t dataGroupCnt) { // Get the first data group offset u_int32_t nextDataGroupFilePos = fh.GetFirstDataGroupFilePos(); for (u_int32_t i = 0; i < dataGroupCnt; ++i) { // Read the DataGroupHeader DataGroupHeader dch; // Move to the indicated position in the file fileStream.seekg(nextDataGroupFilePos, std::ios_base::beg); nextDataGroupFilePos = ReadMinimumInfo(fileStream, dch); fh.AddDataGroupHdr(dch); } } /* * Reads all the DataGroupHeaders in a file and all information for each DataSetHeader in every DataGroup. */ void DataGroupHeaderReader::ReadAll(std::ifstream& fileStream, FileHeader& fh, u_int32_t dataGroupCnt) { // Get the first data group offset u_int32_t nextDataGroupFilePos = fh.GetFirstDataGroupFilePos(); for (u_int32_t i = 0; i < dataGroupCnt; ++i) { // Read the DataGroupHeader DataGroupHeader dch; // Move to the indicated position in the file fileStream.seekg(nextDataGroupFilePos, std::ios_base::beg); nextDataGroupFilePos = Read(fileStream, dch); fh.AddDataGroupHdr(dch); } } /* * Reads the DataGroupHeader and the minimum information for all DataSetHeaders associated with this DataGroupHeader from the file. */ u_int32_t DataGroupHeaderReader::ReadMinimumInfo(std::ifstream& fileStream, DataGroupHeader& grpHdr) { ReadDataGroupStartFilePos(fileStream, grpHdr); u_int32_t dataSetCnt = ReadHeader(fileStream, grpHdr); // Read the DataSets DataSetHeaderReader dphReader; dphReader.ReadAllMinimumInfo(fileStream, grpHdr, dataSetCnt); return grpHdr.GetNextGroupPos(); } /* * Read the DataGroupHeader and all DataSetHeaders associated with this DataGroupHeader from the file. */ u_int32_t DataGroupHeaderReader::Read(std::ifstream& fileStream, DataGroupHeader& grpHdr) { ReadDataGroupStartFilePos(fileStream, grpHdr); u_int32_t dataSetCnt = ReadHeader(fileStream, grpHdr); // Read the DataSets DataSetHeaderReader dphReader; dphReader.ReadAll(fileStream, grpHdr, dataSetCnt); return grpHdr.GetNextGroupPos(); } /* * Reads the DataGroupHeader from the file. Doesn't read all DataSetHeader information. */ u_int32_t DataGroupHeaderReader::ReadHeader(std::ifstream& fileStream, DataGroupHeader& dch) { ReadNextDataGroupFilePos(fileStream, dch); ReadFirstDataSetFilePos(fileStream, dch); u_int32_t dataSetCnt = ReadDataSetCnt(fileStream, dch); ReadDataGroupName(fileStream, dch); return dataSetCnt; } /* * Read the file position of the start of the DataSet. */ void DataGroupHeaderReader::ReadDataGroupStartFilePos(std::ifstream& fileStream, DataGroupHeader& grpHdr) { grpHdr.SetHeaderStartFilePos(fileStream.tellg()); } /* * Reads the file position of the next DataGroup from the file. */ void DataGroupHeaderReader::ReadNextDataGroupFilePos(std::ifstream& fileStream, DataGroupHeader& dch) { //DEBUG //u_int32_t z = fileStream.tellg(); dch.SetNextGroupPos(FileInput::ReadUInt32(fileStream)); } /* * Reads the file position of the first DataSet in the DataGroup. */ void DataGroupHeaderReader::ReadFirstDataSetFilePos(std::ifstream& fileStream, DataGroupHeader& dch) { //DEBUG //u_int32_t z = fileStream.tellg(); dch.SetDataSetPos(FileInput::ReadUInt32(fileStream)); } /* * Reads the number of DataSets in the current DataGroup from the file. */ u_int32_t DataGroupHeaderReader::ReadDataSetCnt(std::ifstream& fileStream, DataGroupHeader& dch) { //DEBUG //u_int32_t z = fileStream.tellg(); return FileInput::ReadUInt32(fileStream); } /* * Reads the DataGroup name from the file. */ void DataGroupHeaderReader::ReadDataGroupName(std::ifstream& fileStream, DataGroupHeader& dch) { //DEBUG //u_int32_t z = fileStream.tellg(); dch.SetName(FileInput::ReadString16(fileStream)); } affxparser/src/fusion/calvin_files/parsers/src/DataGroupHeaderReader.h0000644000175200017520000001235614516003651027236 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataGroupHeaderReader_HEADER_ #define _DataGroupHeaderReader_HEADER_ /*! \file DataGroupHeaderReader.h This file provides methods to read the DataGroupHeaders from a file. */ #include "calvin_files/data/src/GenericData.h" #include "calvin_files/parsers/src/FileException.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // dont show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class reads the all the DataGroupHeader information from a file into a FileHeader object.*/ class DataGroupHeaderReader { public: /*! Constructor */ DataGroupHeaderReader(); public: /*! Reads all the DataGroupHeaders in a file and the minimum information for each DataSetHeader in every DataGroup. * @param fs Open fstream positioned at the start of the first DataGroupHeader in the file. * @param fh FileHeader object to fill. * @param dataGroupCnt Number of DataGroup in the file. */ void ReadAllMinimumInfo(std::ifstream& fs, FileHeader& fh, u_int32_t dataGroupCnt); /*! Reads all the DataGroupHeaders in a file and all information for each DataSetHeader in every DataGroup. * @param fileStream Open fstream positioned at the start of the first DataGroupHeader in the file. * @param fh FileHeader object to fill. * @param dataGroupCnt Number of DataGroup in the file. */ void ReadAll(std::ifstream& fileStream, FileHeader& fh, u_int32_t dataGroupCnt); /*! Reads the DataGroupHeader and the minimum information for all DataSetHeaders associated with this DataGroupHeader * from the file. * @param fileStream Open fstream positioned at the start of the first DataGroupHeader in the file. * @param dch DataGroupHeader object to fill. * @return The file position of the next data group */ u_int32_t ReadMinimumInfo(std::ifstream& fileStream, DataGroupHeader& dch); /*! Read the DataGroupHeader and all DataSetHeaders associated with this DataGroupHeader * from the file. * @param fileStream Open fstream positioned at the start of the first DataGroupHeader in the file. * @param dch DataGroupHeader object to fill. * @return The file position of the next data group */ u_int32_t Read(std::ifstream& fileStream, DataGroupHeader& dch); /*! Reads the DataGroupHeader from the file. Doesn't read all DataSetHeader information. * @param fileStream Open fstream positioned at the start of a DataGroupHeader in the file. * @param dch DataGroupHeader object to fill with the header information. * @return The number of DataSetHeaders associated with the current DataGroupHeader. */ u_int32_t ReadHeader(std::ifstream& fileStream, DataGroupHeader& dch); protected: /*! Read the file position of the start of the DataGroup. * @param fileStream Open fstream positioned at the start of the DataGroupHeader. * @param dsh Reference to the DataGroupHeader object to fill. */ void ReadDataGroupStartFilePos(std::ifstream& fileStream, DataGroupHeader& grpHdr); /*! Reads the file position of the next DataGroup. * @param fileStream Open fstream positioned at the start of the file position of a DataGroupHeader. * @param dch DataGroupHeader object in which to write the file position. */ void ReadNextDataGroupFilePos(std::ifstream& fileStream, DataGroupHeader& dch); /*! Reads the file position of the first DataSet associated with the current DataGroup. * @param fileStream Open fstream positioned at the start of the file position of the first DataSetHeader in the DataGroup. * @param dch DataGroupHeader object to which to add the DataSetHeader information. */ void ReadFirstDataSetFilePos(std::ifstream& fileStream, DataGroupHeader& dch); /*! Reads the number of DataSets associated with the current DataGroup. * @param fileStream Open fstream positioned at the start of the DataGroupHeader count. * @param dch DataGroupHeader object in which to add the DataSet count. */ u_int32_t ReadDataSetCnt(std::ifstream& fileStream, DataGroupHeader& dch); /*! Reads the number of DataGroup name. * @param fileStream Open fstream positioned at the start of the DataGroupHeader name. * @param dch DataGroupHeader object to which to add the DataGroup name. */ void ReadDataGroupName(std::ifstream& fileStream, DataGroupHeader& dch); protected: }; } #endif // _DataGroupHeaderReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/DataGroupReader.cpp0000644000175200017520000000477114516003651026462 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/DataGroupReader.h" // #include "calvin_files/data/src/DataException.h" #include "calvin_files/parsers/src/DataSetReader.h" #include "calvin_files/parsers/src/FileException.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_exceptions; DataGroupReader::DataGroupReader(std::ifstream& is, DataGroupHeader& hdr) : fileStream(is), dataGroupHdr(hdr) { } DataGroupReader::~DataGroupReader() { } /* * Get the number of DataSets in the DataGroup */ int32_t DataGroupReader::GetDataSetCnt() const { return dataGroupHdr.GetDataSetCnt(); } /* * Get the name of the DataGroup */ std::wstring DataGroupReader::GetDataGroupName() const { return dataGroupHdr.GetName(); } /* * Get the DataSetReader by index */ DataSetReader DataGroupReader::GetDataSetReader(int32_t index) { if (index < 0 || index > dataGroupHdr.GetDataSetCnt()) { DataSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } DataSetHeader& dph = dataGroupHdr.GetDataSet(index); DataSetReader dpReader(fileStream, dph); return dpReader; } /* * Get the DataSetReader by name */ DataSetReader DataGroupReader::GetDataSetReader(const std::wstring& name) { DataSetHeader* dph = dataGroupHdr.FindDataSetHeader(name); if (dph == 0) { DataSetNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } DataSetReader dpReader(fileStream, *dph); return dpReader; } affxparser/src/fusion/calvin_files/parsers/src/DataGroupReader.h0000644000175200017520000000506014516003651026117 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataGroupReader_HEADER_ #define _DataGroupReader_HEADER_ /*! \file DataGroupReader.h This file provides methods to read the data of a DataGroup. */ #include "calvin_files/data/src/DataGroupHeader.h" #include "calvin_files/parsers/src/DataSetReader.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! Class to read a DataGroup */ class DataGroupReader { public: /*! Constructor * @param o Reference to an open ifstream * @param hdr Reference to a DataGroupHeader that has been initialized */ DataGroupReader(std::ifstream& o, DataGroupHeader& hdr); /*! Destructor */ ~DataGroupReader(); /*! Get the number of DataSets in the DataGroup * @return DataSet count */ int32_t GetDataSetCnt() const; /*! Get the name of the DataGroup * @return DataGroup name */ std::wstring GetDataGroupName() const; /*! Get the DataSetReader by index * @param index Index of the DataSet * @exception DataSetNotFoundException */ DataSetReader GetDataSetReader(int32_t index); /*! Get the DataSetReader by name * @param name Name of the DataSet * @exception DataSetNotFoundException */ DataSetReader GetDataSetReader(const std::wstring& name); private: /*! Open file stream */ std::ifstream& fileStream; /*! Data dataGroup header that has been initialized */ DataGroupHeader& dataGroupHdr; }; /*! typedef of a vector of DataGroupHeaderReaders */ typedef std::vector DataGroupReaderVector; /*! typedef of a constant iterator of DataGroupHeaderReaders */ typedef std::vector::iterator DataGroupReaderIt; }; #endif // _DataGroupReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/DataSetHeaderReader.cpp0000644000175200017520000001301314516003651027217 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/DataSetHeaderReader.h" // #include "calvin_files/data/src/DataGroupHeader.h" #include "calvin_files/parsers/src/FileInput.h" // #include #include // using namespace affymetrix_calvin_io; /* * Constructor */ DataSetHeaderReader::DataSetHeaderReader() { } /* * Read the names and file offsets for all DataSets associated with a DataGroup. */ void DataSetHeaderReader::ReadAllMinimumInfo(std::ifstream& fileStream, DataGroupHeader& dch, u_int32_t dataSetCnt) { // Get the first dataSet offset u_int32_t nextDataSetFilePos = dch.GetDataSetPos(); for (u_int32_t i = 0; i < dataSetCnt; ++i) { DataSetHeader dph; // Move to the indicated position in the file fileStream.seekg(nextDataSetFilePos, std::ios_base::beg); nextDataSetFilePos = ReadMinimumInfo(fileStream, dph); // Add the DataSetHeader to the file header dch.AddDataSetHdr(dph); } } /* * Read the complete information for all DataSetHeaders associated with a DataGroup. */ void DataSetHeaderReader::ReadAll(std::ifstream& fileStream, DataGroupHeader& dch, u_int32_t dataSetCnt) { // Get the first dataSet offset u_int32_t nextDataSetFilePos = dch.GetDataSetPos(); for (u_int32_t i = 0; i < dataSetCnt; ++i) { DataSetHeader dph; // Move to the indicated position in the file fileStream.seekg(nextDataSetFilePos, std::ios_base::beg); nextDataSetFilePos = Read(fileStream, dph); // Add the DataSetHeader to the file header dch.AddDataSetHdr(dph); } } /* * Reads the minimum DataSetHeader information. */ u_int32_t DataSetHeaderReader::ReadMinimumInfo(std::ifstream& fileStream, DataSetHeader& dsh) { ReadDataSetStartFilePos(fileStream, dsh); ReadDataFilePos(fileStream, dsh); u_int32_t nextDataSetFilePos = ReadNextDataSetFilePos(fileStream, dsh); ReadName(fileStream, dsh); return nextDataSetFilePos; } /* * Reads the complete DataSetHeader information. */ u_int32_t DataSetHeaderReader::Read(std::ifstream& fileStream, DataSetHeader& dsh) { ReadDataSetStartFilePos(fileStream, dsh); ReadDataFilePos(fileStream, dsh); u_int32_t nextDataSetFilePos = ReadNextDataSetFilePos(fileStream, dsh); ReadName(fileStream, dsh); ReadParameters(fileStream, dsh); ReadColumns(fileStream, dsh); ReadRowCount(fileStream, dsh); // dph.SetDataStartFilePos(fileStream.tellg()); // set the offset to the start of the data return nextDataSetFilePos; } /* * Read the file position of the start of the DataSet. */ void DataSetHeaderReader::ReadDataSetStartFilePos(std::ifstream& fileStream, DataSetHeader& dsh) { dsh.SetHeaderStartFilePos(fileStream.tellg()); } /* * Read the file position to the start of the data. */ void DataSetHeaderReader::ReadDataFilePos(std::ifstream& fileStream, DataSetHeader& dsh) { dsh.SetDataStartFilePos(FileInput::ReadUInt32(fileStream)); } /* * Read the file position to the next DataSet. */ u_int32_t DataSetHeaderReader::ReadNextDataSetFilePos(std::ifstream& fileStream, DataSetHeader& dsh) { u_int32_t nextDataSetFilePos = FileInput::ReadUInt32(fileStream); dsh.SetNextSetFilePos(nextDataSetFilePos); return nextDataSetFilePos; } /* * Read the DataSetHeader name. */ void DataSetHeaderReader::ReadName(std::ifstream& fileStream, DataSetHeader& dsh) { std::wstring name = FileInput::ReadString16(fileStream); dsh.SetName(name); } /* * Read the parameter list (name-value-type). */ void DataSetHeaderReader::ReadParameters(std::ifstream& fileStream, DataSetHeader& dsh) { u_int32_t params = FileInput::ReadUInt32(fileStream); for (u_int32_t iparam = 0; iparam < params; ++iparam) { const void* mimeValue = 0; std::wstring paramName = FileInput::ReadString16(fileStream); int32_t mimeSize = FileInput::ReadBlob(fileStream, mimeValue); std::wstring paramType = FileInput::ReadString16(fileStream); ParameterNameValueType nvt(paramName, (void*)mimeValue, mimeSize, paramType); // deleting 'const void*' is undefined, cast it to a char* delete[] (char*)mimeValue; dsh.AddNameValParam(nvt); } } /* * Read column information. */ void DataSetHeaderReader::ReadColumns(std::ifstream& fileStream, DataSetHeader& dsh) { // Read the number of columns u_int32_t columns = FileInput::ReadUInt32(fileStream); for (u_int32_t icol = 0; icol < columns; ++icol) { // Read the name std::wstring name = FileInput::ReadString16(fileStream); // Read the type int8_t type = FileInput::ReadInt8(fileStream); // Read the size int32_t size = FileInput::ReadInt32(fileStream); dsh.AddColumn(ColumnInfo(name, (DataSetColumnTypes)type, size)); } } /* * Read the number of rows. */ void DataSetHeaderReader::ReadRowCount(std::ifstream& fileStream, DataSetHeader& dsh) { int32_t numRows = FileInput::ReadInt32(fileStream); dsh.SetRowCnt(numRows); } affxparser/src/fusion/calvin_files/parsers/src/DataSetHeaderReader.h0000644000175200017520000001210214516003651026662 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataSetHeaderReader_HEADER_ #define _DataSetHeaderReader_HEADER_ /*! \file DataSetHeaderReader.h This file provides methods to read the DataSetHeader from a file. */ #include "calvin_files/data/src/GenericData.h" #include "calvin_files/parsers/src/FileException.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // dont show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class reads the all DataSetHeader information associated with a DataGroup from a file. */ class DataSetHeaderReader { public: /*! Constructor */ DataSetHeaderReader(); public: /*! Reads the minimum DataSetHeader information for all DataSets associated with a DataGroup. * @param fileStream Open fstream positioned at the start of the first DataSetHeader in a DataGroup. * @param dch DataGroupHeader object to which to add the DataSetHeader information. * @param dataSetCnt Number of DataSets in the DataGroup. */ void ReadAllMinimumInfo(std::ifstream& fileStream, DataGroupHeader& dch, u_int32_t dataSetCnt); /*! Reads the complete DataSetHeader information for all DataSets associated with a DataGroup. * @param fileStream Open fstream positioned at the start of the first DataSetHeader in a DataGroup. * @param dch DataGroupHeader object to which to add the DataSetHeader information. * @param dataSetCnt Number of DataSets in the DataGroup. */ void ReadAll(std::ifstream& fileStream, DataGroupHeader& dch, u_int32_t dataSetCnt); /*! Reads the minimum DataSetHeader information. * @param fileStream Open fstream positioned at the start of the DataSetHeader. * @param dsh Reference to the DataSetHeader object to fill. * @return The file position of the next DataSet. */ u_int32_t ReadMinimumInfo(std::ifstream& fileStream, DataSetHeader& dsh); /*! Reads the complete DataSetHeader information. * @param fileStream Open fstream positioned at the start of the DataSetHeader. * @param dsh Reference to the DataSetHeader object to fill. * @return The file position of the next DataSet. */ u_int32_t Read(std::ifstream& fileStream, DataSetHeader& dsh); protected: /*! Read the file position of the start of the DataSet. * @param fileStream Open fstream positioned at the start of the DataSetHeader. * @param dsh Reference to the DataSetHeader object to fill. */ void ReadDataSetStartFilePos(std::ifstream& fileStream, DataSetHeader& dsh); /*! Read the file position to the start of the data. * @param fileStream Open fstream positioned at the start of the data file position. * @param dsh Reference to the DataSetHeader object to fill. */ void ReadDataFilePos(std::ifstream& fileStream, DataSetHeader& dsh); /*! Read the file position to the next DataSet. * @param fileStream Open fstream positioned at the start of the next DataSet file position. * @param dsh Reference to the DataSetHeader object to fill. * @return The file position of the next data set. */ u_int32_t ReadNextDataSetFilePos(std::ifstream& fileStream, DataSetHeader& dsh); /*! Read the DataSetHeader name. * @param fileStream Open fstream positioned at the start of the DataSetHeader name. * @param dsh Reference to the DataSetHeader object to fill. */ void ReadName(std::ifstream& fileStream, DataSetHeader& dsh); /*! Read the parameter list (name-value-type). * @param fileStream Open fstream positioned at the start of the DataSetHeader parameter list count. * @param dsh Reference to the DataSetHeader object to fill. */ void ReadParameters(std::ifstream& fileStream, DataSetHeader& dsh); /*! Read column information. * @param fileStream Open fstream positioned at the start of the DataSetHeader column count. * @param dsh Reference to the DataSetHeader object to fill. */ void ReadColumns(std::ifstream& fileStream, DataSetHeader& dsh); /*! Read the number of rows. * @param fileStream Open fstream positioned at the start of the DataSetHeader row count. * @param dsh Reference to the DataSetHeader object to fill. */ void ReadRowCount(std::ifstream& fileStream, DataSetHeader& dsh); }; } #endif // _DataSetHeaderReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/DataSetReader.cpp0000644000175200017520000000610514516003651026112 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/DataSetReader.h" // #include "calvin_files/parsers/src/FileInput.h" // using namespace affymetrix_calvin_io; /* * Constructor */ DataSetReader::DataSetReader(std::ifstream& is, DataSetHeader& dph) : fileStream(is), dataSetHdr(dph) { // Position the file stream to the start of the DataSet fileStream.seekg(dataSetHdr.GetDataStartFilePos()); } /* * Destructor */ DataSetReader::~DataSetReader() { } /* * Get the DataSet name */ std::wstring DataSetReader::GetDataSetName() const { return dataSetHdr.GetName(); } /* * Read int8_t from the DataSet. File stream position is incremented */ int32_t DataSetReader::ReadBuffer(char* buffer, int32_t count) { fileStream.read(buffer, count); return fileStream.gcount(); } /* * Read int8_t from the DataSet. File stream position is incremented */ void DataSetReader::Read(int8_t& value) { value = FileInput::ReadInt8(fileStream); } /* * Read u_int8_t from the DataSet. File stream position is incremented */ void DataSetReader::Read(u_int8_t& value) { value = FileInput::ReadUInt8(fileStream); } /* * Read int16_t from the DataSet. File stream position is incremented */ void DataSetReader::Read(int16_t& value) { value = FileInput::ReadInt16(fileStream); } /* * Read u_int16_t from the DataSet. File stream position is incremented */ void DataSetReader::Read(u_int16_t& value) { value = FileInput::ReadUInt16(fileStream); } /* * Read int32_t from the DataSet. File stream position is incremented */ void DataSetReader::Read(int32_t& value) { value = FileInput::ReadInt32(fileStream); } /* * Read u_int32_t from the DataSet. File stream position is incremented */ void DataSetReader::Read(u_int32_t& value) { value = FileInput::ReadUInt32(fileStream); } /* * Read float from the DataSet. File stream position is incremented */ void DataSetReader::Read(float& value) { value = FileInput::ReadFloat(fileStream); } /* * Read string from the DataSet. File stream position is incremented */ void DataSetReader::Read(std::string& value) { value = FileInput::ReadString8(fileStream); } /* * Read wstring from the DataSet. File stream position is incremented */ void DataSetReader::Read(std::wstring& value) { value = FileInput::ReadString16(fileStream); } affxparser/src/fusion/calvin_files/parsers/src/DataSetReader.h0000644000175200017520000000747514516003651025572 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataSetReader_HEADER_ #define _DataSetReader_HEADER_ /*! \file DataSetReader.h This file provides methods to read the data of a DataSet. */ #include "calvin_files/data/src/DataSetHeader.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! Class to read a DataSet */ class DataSetReader { public: /*! Constructor * @param s Reference to an open ifstream * @param d Reference to a DataSetHeader that has been initialized */ DataSetReader(std::ifstream& s, DataSetHeader& d); /*! Destructor */ ~DataSetReader(); public: /*! Get the DataSet name * @return DataSet name */ std::wstring GetDataSetName() const; /*! Get the DataSetHeader * @return DataSetHeader */ const DataSetHeader& GetDataSetHeader(){ return dataSetHdr; } /*! Reads a byte buffer from the DataSet. File stream position is incremented * @param buffer Pointer to the buffer. * @param count Size of the buffer. * @return The number of bytes read. */ int32_t ReadBuffer(char* buffer, int32_t count); /*! Read int8_t from the DataSet. File stream position is incremented * @param value Variable reference to receive the value */ void Read(int8_t& value); /*! Read u_int8_t from the DataSet. File stream position is incremented * @param value Variable reference to receive the value */ void Read(u_int8_t& value); /*! Read int16_t from the DataSet. File stream position is incremented * @param value Variable reference to receive the value */ void Read(int16_t& value); /*! Read u_int16_t from the DataSet. File stream position is incremented * @param value Variable reference to receive the value */ void Read(u_int16_t& value); /*! Read int32_t from the DataSet. File stream position is incremented * @param value Variable reference to receive the value */ void Read(int32_t& value); /*! Read u_int32_t from the DataSet. File stream position is incremented * @param value Variable reference to receive the value */ void Read(u_int32_t& value); /*! Read float from the DataSet. File stream position is incremented * @param value Variable reference to receive the value */ void Read(float& value); /*! Read string from the DataSet. File stream position is incremented * @param value Variable reference to receive the value */ void Read(std::string& value); /*! Read wstring from the DataSet. File stream position is incremented * @param value Variable reference to receive the value */ void Read(std::wstring& value); private: /*! Open file stream */ std::ifstream& fileStream; /*! Data dataSet header that has been initialized */ DataSetHeader& dataSetHdr; }; /*! typedef of a vector of DataSetReaders */ typedef std::vector DataSetReaderVector; /*! typedef of a constant iterator of DataSetReaders */ typedef std::vector::iterator DataSetReaderIt; } #endif // _DataSetReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/FileException.cpp0000644000175200017520000000330114516003651026173 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/FileException.h" // namespace affymetrix_calvin_exceptions { const std::wstring FileNotFoundException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::FileNotFoundException thrown."; } const std::wstring InvalidVersionException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::InvalidVersionException thrown."; } const std::wstring InvalidFileTypeException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::InvalidFileTypeException thrown."; } const std::wstring UnableToOpenFileException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::UnableToOpenFileException thrown."; } const std::wstring FileNotOpenException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::FileNotOpenException thrown."; } } affxparser/src/fusion/calvin_files/parsers/src/FileException.h0000644000175200017520000000626114516003651025650 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FileException_HEADER_ #define _FileException_HEADER_ /*! \file FileException.h This file provides class definition for the file exceptions. */ #include "calvin_files/exception/src/ExceptionBase.h" // namespace affymetrix_calvin_exceptions { class FileNotFoundException : public CalvinException { public: FileNotFoundException() : CalvinException() {} FileNotFoundException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; class InvalidVersionException : public CalvinException { public: InvalidVersionException() : CalvinException() {} InvalidVersionException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; class InvalidFileTypeException : public CalvinException { public: InvalidFileTypeException() : CalvinException() {} InvalidFileTypeException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; class UnableToOpenFileException : public CalvinException { public: UnableToOpenFileException() : CalvinException() {} UnableToOpenFileException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; class FileNotOpenException : public CalvinException { public: FileNotOpenException() : CalvinException() {} FileNotOpenException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; } #endif // _FileException_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/FileHeaderReader.cpp0000644000175200017520000000636514516003651026565 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/FileHeaderReader.h" // #include "calvin_files/parsers/src/FileInput.h" #include "calvin_files/parsers/src/GenericDataHeaderReader.h" // #include #include // using namespace affymetrix_calvin_io; /*! The expected magic number */ #define DATA_FILE_MAGIC_NUMBER 59 /*! The expected version number */ #define DATA_FILE_VERSION_NUMBER 1 /* * Constructor */ FileHeaderReader::FileHeaderReader( std::ifstream& fs, FileHeader &fh) : fileStream(fs), header(fh) { dataGroupCnt = 0; firstDataGroupFilePos = 0; } /* * Read the FileHeader from the file. */ void FileHeaderReader::Read() { ReadMagicNumber(); ReadVersion(); ReadDataGroupCnt(); ReadFirstDataGroupFilePos(); ReadGenericDataHdr(); } /* * Reads the file magic number from the file. */ void FileHeaderReader::ReadMagicNumber() { // Read magic number u_int8_t fileMagicNumber = FileInput::ReadInt8(fileStream); if (fileMagicNumber != DATA_FILE_MAGIC_NUMBER) { affymetrix_calvin_exceptions::InvalidFileTypeException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } /* * Reads the generic file version number from the file. */ void FileHeaderReader::ReadVersion() { // Read generic data file version u_int8_t fileVersionNumber = FileInput::ReadInt8(fileStream); if (fileVersionNumber != DATA_FILE_VERSION_NUMBER) { affymetrix_calvin_exceptions::InvalidVersionException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } /* * Reads the DataGroup count from the file. */ void FileHeaderReader::ReadDataGroupCnt() { dataGroupCnt = FileInput::ReadUInt32(fileStream); header.SetNumDataGroups(dataGroupCnt); } /* * Reads the file position of the first DataGroup. */ void FileHeaderReader::ReadFirstDataGroupFilePos() { firstDataGroupFilePos = FileInput::ReadUInt32(fileStream); header.SetFirstDataGroupFilePos(firstDataGroupFilePos); } /* * Reads the GenericDataHeader from the file. */ void FileHeaderReader::ReadGenericDataHdr() { // Read all the GenericDataHeader GenericDataHeader gdh; GenericDataHeaderReader gdhReader(fileStream); gdhReader.Read(gdh); // Set the GenericDataHeader in the FileHeader header.SetGenericDataHdr(gdh); } affxparser/src/fusion/calvin_files/parsers/src/FileHeaderReader.h0000644000175200017520000000620514516003651026223 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FileHeaderReader_HEADER_ #define _FileHeaderReader_HEADER_ /*! \file FileHeaderReader.h This file provides methods to read a generic file header. */ #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/parsers/src/FileException.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // dont show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class reads the FileHeader of the generic data file. */ class FileHeaderReader { public: /*! Constructor * @param fs Open fstream positioned at the start of the file. * @param fh FileHeader object to fill. */ FileHeaderReader(std::ifstream& fs, FileHeader &fh); /*! Reads the FileHeader. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void Read(); /*! Gets the number of DataGroups in the file. * @return Number of DataGroups. */ u_int32_t GetDataGroupCnt() const { return dataGroupCnt; } /*! Gets the file position of the first DataGroup header. */ u_int32_t GetFirstDataGroupFilePos() const { return firstDataGroupFilePos; } protected: /*! Reads the magic number from the file. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void ReadMagicNumber(); /*! Reads the generic file version number from the file. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. */ void ReadVersion(); /*! Reads the DataGroup count from the file. */ void ReadDataGroupCnt(); /*! Reads the file position of the first DataGroup. */ void ReadFirstDataGroupFilePos(); /*! Reads the GenericDataHeader from the file. */ void ReadGenericDataHdr(); protected: /*! A reference to the file stream. */ std::ifstream& fileStream; /*! FileHeader object to fill. */ FileHeader& header; /*! Number of DataGroups. */ u_int32_t dataGroupCnt; /*! Position of the first DataGroup. */ u_int32_t firstDataGroupFilePos; }; } #endif //_FileHeaderReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/FileInput.cpp0000644000175200017520000002052014516003651025336 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifdef _MSC_VER #include "windows.h" #endif #include "calvin_files/parsers/src/FileInput.h" // #include #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) #else #include #include #include #endif // Some machines (sparc) dont support unaligned memory access // The mmaped files are chock full of unaligned accesses. // When AFFY_NOUNALIGNED_MEM is defined we will do the // alignment in software. // This feature can be enabled on intel for testing. // This is here to enable this feature automaticly on the sparc and ppc. // they cant do unaligned loads. #ifdef __sparc__ #define AFFY_UNALIGNED_IN_SW #endif using namespace affymetrix_calvin_io; /* * Read an 8 bit number from the file. */ int8_t FileInput::ReadInt8(std::ifstream &instr) { int8_t val = 0; instr.read((char *)&val, sizeof(val)); return val; } /* * Read a 16 bit number from the file. */ int16_t FileInput::ReadInt16(std::ifstream &instr) { int16_t val = 0; instr.read((char *)&val, sizeof(val)); val = ntohs(val); return val; } /* * Read a 32 bit number from the file. */ int32_t FileInput::ReadInt32(std::ifstream &instr) { int32_t val = 0; instr.read((char *)&val, sizeof(val)); val = ntohl(val); return val; } /* * Read an 8 bit unsigned number from the file. */ u_int8_t FileInput::ReadUInt8(std::ifstream &instr) { u_int8_t val = 0; instr.read((char *)&val, sizeof(val)); return val; } /* * Read a 16 bit unsigned number from the file. */ u_int16_t FileInput::ReadUInt16(std::ifstream &instr) { u_int16_t val = 0; instr.read((char *)&val, sizeof(val)); val = ntohs(val); return val; } /* * Read a 32 bit unsigned number from the file. */ u_int32_t FileInput::ReadUInt32(std::ifstream &instr) { u_int32_t val = 0; instr.read((char *)&val, sizeof(val)); val = ntohl(val); return val; } /* * Read a 32 bit floating point value from a file. */ float FileInput::ReadFloat(std::ifstream &instr) { type_punned pun; pun.v_uint32=ReadInt32(instr); return pun.v_float; } /* * Read a string from the file stream. */ std::string FileInput::ReadString8(std::ifstream &instr) { int32_t len = FileInput::ReadInt32(instr); return FileInput::ReadString8(instr, len); } /* * Read a string from the file stream. */ std::string FileInput::ReadString8(std::ifstream &instr, int32_t len) { char *buf = new char [len+1]; instr.read(buf, len); buf[len]=0; std::string s=buf; delete[] buf; return s; } /* * Read the length (integer) then the string from the file. */ std::wstring FileInput::ReadString16(std::ifstream &instr) { int32_t len = FileInput::ReadInt32(instr); return FileInput::ReadString16(instr, len); } /* * Read a string from the file. */ std::wstring FileInput::ReadString16(std::ifstream &instr, int32_t len) { u_int16_t cvalue; wchar_t* s = new wchar_t[len+1]; s[len] = 0; for (int i=0; i #include #include // namespace affymetrix_calvin_io { class FileInput { public: /*! Reads an 8 bit integer from a big endian file. * * @param instr The input file stream. * @return The integer read from the file. */ static int8_t ReadInt8(std::ifstream &instr); /*! Reads a 16 bit integer from a big endian file. * * @param instr The input file stream. * @return The integer read from the file. */ static int16_t ReadInt16(std::ifstream &instr); /*! Reads a 32 bit integer from a big endian file. * * @param instr The input file stream. * @return The integer read from the file. */ static int32_t ReadInt32(std::ifstream &instr); /*! Reads an 8 bit unsigned integer from a big endian file. * * @param instr The input file stream. * @return The integer read from the file. */ static u_int8_t ReadUInt8(std::ifstream &instr); /*! Reads a 16 bit unsigned integer from a big endian file. * * @param instr The input file stream. * @return The integer read from the file. */ static u_int16_t ReadUInt16(std::ifstream &instr); /*! Reads a 32 bit unsigned integer from a big endian file. * * @param instr The input file stream. * @return The integer read from the file. */ static u_int32_t ReadUInt32(std::ifstream &instr); /*! Reads a 32 bit floating point number from a big endian file. * * @param instr The input file stream. * @return The floating point number read from the file. */ static float ReadFloat(std::ifstream &instr); /*! Reads an 8 bit integer from a big endian file stream (memory map pointer). * * @param instr The input file stream. * @return The integer read from the file stream. */ static int8_t ReadInt8(char * &instr); /*! Reads a 16 bit integer from a big endian file stream (memory map pointer). * * @param instr The input file stream. * @return The integer read from the file stream. */ static int16_t ReadInt16(char * &instr); /*! Reads a 32 bit integer from a big endian file stream (memory map pointer). * * @param instr The input file stream. * @return The integer read from the file stream. */ static int32_t ReadInt32(char * &instr); /*! Reads an 8 bit unsigned integer from a big endian file stream (memory map pointer). * * @param instr The input file stream. * @return The integer read from the file stream. */ static u_int8_t ReadUInt8(char * &instr); /*! Reads a 16 bit unsigned integer from a big endian file stream (memory map pointer). * * @param instr The input file stream. * @return The integer read from the file stream. */ static u_int16_t ReadUInt16(char * &instr); /*! Reads a 32 bit unsigned integer from a big endian file stream (memory map pointer). * * @param instr The input file stream. * @return The integer read from the file stream. */ static u_int32_t ReadUInt32(char * &instr); /*! Reads a 32 bit floating point number from a big endian file stream (memory map pointer). * * @param instr The input file stream. * @return The floating point number read from the file stream. */ static float ReadFloat(char * &instr); /*! Reads a 16 bit unicode string of fixed size from a big endian file. * * @param instr The input file stream. * @param len The length of the string. * @return The string read from the file stream. */ static std::wstring ReadString16(std::ifstream &instr, int32_t len); /*! Reads a 16 bit unicode string from a big endian file. * * @param instr The input file stream. * @return The string read from the file stream. */ static std::wstring ReadString16(std::ifstream &instr); /*! Reads an 8 bit string of fixed size from a big endian file. * * @param instr The input file stream. * @param len The length of the string. * @return The string read from the file stream. */ static std::string ReadString8(std::ifstream &instr, int32_t len); /*! Reads an 8 bit string from a big endian file. * * @param instr The input file stream. * @return The string read from the file stream. */ static std::string ReadString8(std::ifstream &instr); /*! Reads a 16 bit unicode string of fixed size from a big endian file stream (memory map pointer). * * @param instr The input file stream. * @param len The length of the string. * @return The string read from the file stream. */ static std::wstring ReadString16(char * &instr, int32_t len); /*! Reads a 16 bit unicode string from a big endian file stream (memory map pointer). * * @param instr The input file stream. * @return The string read from the file stream. */ static std::wstring ReadString16(char * &instr); /*! Reads an 8 bit string of fixed size from a big endian file stream (memory map pointer). * * @param instr The input file stream. * @param len The length of the string. * @return The string read from the file stream. */ static std::string ReadString8(char * &instr, int32_t len); /*! Reads an 8 bit string from a big endian file stream (memory map pointer). * * @param instr The input file stream. * @return The string read from the file stream. */ static std::string ReadString8(char * &instr); /*! Reads a blob from a file (as is; no htonl). It is the responsibility of the caller to * convert elements of the blob to system types. * @param instr The input file stream. * @param value A returned pointer to the blob. The caller must delete[] the returned pointer. * @return The size of the blob read from the file. */ static int32_t ReadBlob(std::ifstream &instr, const void*& value); }; } #endif // _FileInput_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/GenericDataHeaderReader.cpp0000644000175200017520000000560314516003651030046 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/GenericDataHeaderReader.h" // #include "calvin_files/data/src/GenericDataHeader.h" #include "calvin_files/parsers/src/FileInput.h" // #include #include // using namespace affymetrix_calvin_io; /* * Constructor */ GenericDataHeaderReader::GenericDataHeaderReader(std::ifstream& fs) : fileStream(fs) { } /* * Reads the GenericDataHeader and all parent GenericDataHeaders. * Calls Read() with true to retain originqal intent. */ void GenericDataHeaderReader::Read(GenericDataHeader& gdh) { // retain original calling logic. Read(gdh, true); } /* * Reads the GenericDataHeader and all parent GenericDataHeaders. * In call to AddNameValParam, doUniqueAdds is passed. */ void GenericDataHeaderReader::Read(GenericDataHeader& gdh, bool doUniqueAdds) { // Read data type identifier gdh.SetFileTypeId(FileInput::ReadString8(fileStream)); // Read file identifier gdh.SetFileId(FileInput::ReadString8(fileStream)); // Read file creation time gdh.SetFileCreationTime(FileInput::ReadString16(fileStream)); // Read locale gdh.SetLocale(FileInput::ReadString16(fileStream)); // Read name value pairs u_int32_t paramCount = FileInput::ReadUInt32(fileStream); for (u_int32_t iparam = 0; iparam < paramCount; ++iparam) { const void* mimeValue = 0; std::wstring name = FileInput::ReadString16(fileStream); int32_t mimeSize = FileInput::ReadBlob(fileStream,mimeValue); std::wstring type = FileInput::ReadString16(fileStream); ParameterNameValueType nvt(name,(void*)mimeValue, mimeSize, type); // deleting a "const void*" generates a warning under gcc. // cast to a "char*" to quiet it. delete[] (char*)mimeValue; gdh.AddNameValParam(nvt,doUniqueAdds); } // Read number of generic data parent header u_int32_t numParents = FileInput::ReadUInt32(fileStream); // Read each parent header in turn - this needs to be recursive for (u_int32_t iparent = 0; iparent < numParents; ++iparent) { GenericDataHeader parentGDH; Read(parentGDH); gdh.AddParent(parentGDH); } } affxparser/src/fusion/calvin_files/parsers/src/GenericDataHeaderReader.h0000644000175200017520000000452214516003651027512 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GenericDataHeaderReader_HEADER_ #define _GenericDataHeaderReader_HEADER_ /*! \file GenericDataHeaderReader.h This file provides methods to read a generic data file header. */ #include "calvin_files/data/src/GenericDataHeader.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // dont show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class reads the GenericDataHeader from a file. */ class GenericDataHeaderReader { public: /*! Constructor * @param fs Open stream positioned at the start of the first GenericDataHeader. */ GenericDataHeaderReader(std::ifstream& fs); public: /*! Read the GenericDataHeader and all parent GenericDataHeaders from the input stream. * @param gdh Reference to the GenericDataHeader object to which to add the GenericDataHeader information. */ void Read(GenericDataHeader& gdh); /*! Read the GenericDataHeader and all parent GenericDataHeaders from the input stream. * @param gdh Reference to the GenericDataHeader object to which to add the GenericDataHeader information. * @param doUniqueAdds Hint to indicate to ensure no duplicates are added to the collection. If true, the * check is made and performance is N^2, if false, no check is made and is assuming you are just reading from a file. */ void Read(GenericDataHeader& gdh, bool doUniqueAdds); protected: /*! A reference to the file stream. */ std::ifstream& fileStream; }; } #endif // _GenericDataHeaderReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/GenericFileReader.cpp0000644000175200017520000001330214516003651026736 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/GenericFileReader.h" // #include "calvin_files/exception/src/DevelopmentException.h" #include "calvin_files/parsers/src/DataGroupHeaderReader.h" #include "calvin_files/parsers/src/FileHeaderReader.h" #include "calvin_files/parsers/src/FileInput.h" // #include "util/Fs.h" // #include #include // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_exceptions; /* * Initialize the class. */ GenericFileReader::GenericFileReader() { gendata = 0; } /* * Clean up. */ GenericFileReader::~GenericFileReader() { CloseFile(); } /* * Read the file header of the generic file. */ void GenericFileReader::ReadHeader(affymetrix_calvin_io::GenericData& data, ReadHeaderOption option) { OpenFile(); switch(option) { case ReadNoDataGroupHeader: ReadFileHeaderNoDataGroupHeader(data); break; case ReadMinDataGroupHeader: ReadFileHeaderMinDP(data); break; case ReadAllHeaders: // fall through default: ReadFileHeader(data); break; } CloseFile(); } /* * Open the file stream. */ void GenericFileReader::OpenFile() { Fs::aptOpen(fileStream,fileName, std::ios::in | std::ios::binary); if (!fileStream) { affymetrix_calvin_exceptions::FileNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } /* * Closes the file stream. */ void GenericFileReader::CloseFile() { if (fileStream.is_open()) fileStream.close(); gendata = 0; } /* * Read the file header and minimize amount of information read from the DataSetHeader. */ void GenericFileReader::ReadFileHeaderMinDP(affymetrix_calvin_io::GenericData& data) { // Set the file name data.Header().SetFilename(fileName); FileHeaderReader fhReader(fileStream, data.Header()); fhReader.Read(); // TODO: Remove existing DataGroupHdrs. DataGroupHeaderReader dchReader; dchReader.ReadAllMinimumInfo(fileStream, data.Header(), fhReader.GetDataGroupCnt()); } /* * Reads the file header of the generic file and reads all the DataSetHeader information. */ void GenericFileReader::ReadFileHeader(affymetrix_calvin_io::GenericData& data) { // Set the file name data.Header().SetFilename(fileName); FileHeaderReader fhReader(fileStream, data.Header()); fhReader.Read(); DataGroupHeaderReader dchReader; dchReader.ReadAll(fileStream, data.Header(), fhReader.GetDataGroupCnt()); } /* * Reads the file header of the generic file but does not read any DataGroupHeaders or DataSetHeaders. */ void GenericFileReader::ReadFileHeaderNoDataGroupHeader(GenericData& data) { // Set the file name data.Header().SetFilename(fileName); FileHeaderReader fhReader(fileStream, data.Header()); fhReader.Read(); } /* * Open the file for reading */ void GenericFileReader::Open(GenericData& data, OpenHint hint) { if (hint == All) { OpenFile(); ReadFileHeader(data); gendata = &data; } else { NotImplementedException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } /* * Gets the number of DataGroups in the file. */ u_int32_t GenericFileReader::GetDataGroupCnt() const { if (gendata) return gendata->DataGroupCnt(); return 0; } /* * Gets DataGroupReader by index. OpenHint should be All or sequential. */ DataGroupReader GenericFileReader::GetDataGroupReader(int32_t index) { if (gendata==0 || fileStream.is_open() == false) { DataGroupNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } DataGroupHeader* dch = gendata->FindDataGroupHeader(index); if (dch == 0) { DataGroupNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } return DataGroupReader(fileStream, *dch); } /* * Gets the DataGroupReader by name. OpenHint must be All. */ DataGroupReader GenericFileReader::GetDataGroupReader(const std::wstring& name) { if (gendata==0 || fileStream.is_open() == false) { DataGroupNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } DataGroupHeader* dch = gendata->FindDataGroupHeader(name); if (dch == 0) { DataGroupNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } return DataGroupReader(fileStream, *dch);} /* * Closes the file. */ void GenericFileReader::Close() { CloseFile(); } affxparser/src/fusion/calvin_files/parsers/src/GenericFileReader.h0000644000175200017520000001275414516003651026415 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GenericFileReader_HEADER_ #define _GenericFileReader_HEADER_ /*! \file GenericFileReader.h This file provides methods to read a generic data file. */ #include "calvin_files/data/src/GenericData.h" #include "calvin_files/parsers/src/DataGroupReader.h" #include "calvin_files/parsers/src/FileException.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // dont show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class reads a generic data file. It is the top-level generic data file reader.*/ class GenericFileReader { public: /*! Constructor */ GenericFileReader(); /*! Destructor */ ~GenericFileReader(); /*! Hint used when opening a file */ enum OpenHint { All, Sequential, None }; /*! Indicates how much header information to read */ enum ReadHeaderOption { ReadAllHeaders, ReadMinDataGroupHeader, ReadNoDataGroupHeader }; public: /*! Gets the name of the input file. * * @return The name of the input file. */ const std::string GetFilename() const { return fileName; } /*! Sets the name of the input file. * * @param name The name of the input file. */ void SetFilename(const std::string &name) { fileName = name; } /*! Read the file header of the generic file. * * @param data A reference to a GenericData object that will receive header information from the file. * @param option Indicates how much DataGroupHeader and DataSetHeader information to read. * @exception affymetrix_calvin_exceptions::FileNotFoundException The file does not exist. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void ReadHeader(GenericData& data, ReadHeaderOption option=ReadAllHeaders); /*! Open the file for reading * @param data A reference to a GenericData object that will receive header information from the file. Amount of info depends on the hint. * @param hint A hint on how to open the file. * @exception affymetrix_calvin_exceptions::FileNotFoundException The file does not exist. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void Open(GenericData& data, OpenHint hint=All); /*! Gets the number of DataGroups in the file. * @return The number of DataGroups in the file. */ u_int32_t GetDataGroupCnt() const; /*! Gets DataGroupReader by index. OpenHint should be All or sequential. * @param index Data dataGroup index. If opened for sequential access the index should be an increment of one from call to call. * @return A DataGroupReader to use to read the data. * @exception DataGroupNotFoundException */ DataGroupReader GetDataGroupReader(int32_t index); /*! Gets the DataGroupReader by name. OpenHint must be All. * @param name Data dataGroup name. * @return A DataGroupReader to use to read the data. * @exception DataGroupNotFoundException */ DataGroupReader GetDataGroupReader(const std::wstring& name); /*! Gets the DataGroupReader based on the file position of the DataGroup. * @param index Data dataGroup file position. * @return A DataGroupReader to use to read the data. */ // DataGroupReader GetDataGroupReader(u_int32_t filePosition); /*! Closes the file. */ void Close(); protected: /*! Opens the file for reading */ void OpenFile(); /*! Read the file header and minimize amount of information read from the DataSetHeaders. * It does not attempt to read the complete DataSetHeader. That is deferred * until accessed by the DataSet object. * @param data Reference to the GenericData object to fill. */ void ReadFileHeaderMinDP(GenericData& data); /*! Reads the file header of the generic file and reads all the DataSetHeader information. * @param data Reference to the GenericData object to fill. */ void ReadFileHeader(GenericData& data); /*! Reads the file header of the generic file but does not read any DataGroupHeaders or DataSetHeaders. * @param data Reference to the GenericData object to fill. */ void ReadFileHeaderNoDataGroupHeader(GenericData& data); /*! Closes the file */ void CloseFile(); protected: /*! The name of the input file. */ std::string fileName; /*! The file stream. */ std::ifstream fileStream; /*! A pointer to the GenericData object */ GenericData* gendata; }; } #endif // _GenericFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/GridControlFileReader.cpp0000644000175200017520000001033314516003651027611 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/GridControlFileReader.h" // #include "calvin_files/parameter/src/Parameter.h" #include "calvin_files/parsers/src/GenericFileReader.h" // #include "file/GridControlData.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_grid_control; using namespace affymetrix_calvin_parameter; /* * Constructor */ GridControlFileReader::GridControlFileReader() { } /* * Destructor */ GridControlFileReader::~GridControlFileReader() { } /* * Read the file contents into the data object. */ void GridControlFileReader::Read(const std::string &fileName, GridControlData& data) { // Clear the old data and read the file. data.Clear(); GenericFileReader reader; GenericData gdata; reader.SetFilename(fileName); reader.ReadHeader(gdata); // Check the file identifier if (gdata.FileIdentifier() != GRD_FILE_TYPE_IDENTIFIER) { affymetrix_calvin_exceptions::InvalidFileTypeException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } // Get the header parameters. ParameterNameValueType param; gdata.Header().GetGenericDataHdr()->FindNameValParam(GRD_ROWS_PARAMETER_NAME, param); data.SetRows(param.GetValueInt32()); gdata.Header().GetGenericDataHdr()->FindNameValParam(GRD_COLUMNS_PARAMETER_NAME, param); data.SetColumns(param.GetValueInt32()); // Get the XY coordinates from the dataSets. FeatureCoordinate coord; u_int16_t x; u_int16_t y; int nRows; DataSet *dataSet; // First the B1 probes. dataSet = gdata.DataSet(GRD_FILE_COORDINATE_GROUP_NAME, GRD_FILE_B1_SET_NAME); if (dataSet->Open() == false) { affymetrix_calvin_exceptions::DataSetNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } nRows = dataSet->Rows(); data.ResizeB1(nRows); for (int iRow=0; iRowGetData(iRow, 0, x); dataSet->GetData(iRow, 1, y); coord.x = x; coord.y = y; data.SetB1(iRow, coord); } dataSet->Close(); dataSet->Delete(); // Next the B2 probes. dataSet = gdata.DataSet(GRD_FILE_COORDINATE_GROUP_NAME, GRD_FILE_B2_SET_NAME); if (dataSet->Open() == false) { affymetrix_calvin_exceptions::DataSetNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } nRows = dataSet->Rows(); data.ResizeB2(nRows); for (int iRow=0; iRowGetData(iRow, 0, x); dataSet->GetData(iRow, 1, y); coord.x = x; coord.y = y; data.SetB2(iRow, coord); } dataSet->Close(); dataSet->Delete(); // Last the NS probes. dataSet = gdata.DataSet(GRD_FILE_COORDINATE_GROUP_NAME, GRD_FILE_NS_SET_NAME); if (dataSet->Open() == false) { affymetrix_calvin_exceptions::DataSetNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } nRows = dataSet->Rows(); data.ResizeNS(nRows); for (int iRow=0; iRowGetData(iRow, 0, x); dataSet->GetData(iRow, 1, y); coord.x = x; coord.y = y; data.SetNS(iRow, coord); } dataSet->Close(); dataSet->Delete(); } affxparser/src/fusion/calvin_files/parsers/src/GridControlFileReader.h0000644000175200017520000000540514516003651027262 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GridControlFileReader_HEADER_ #define _GridControlFileReader_HEADER_ /*! \file GridControlFileReader.h This file provides methods to read a GRC data file. */ #include "calvin_files/data/src/DataException.h" #include "calvin_files/parsers/src/FileException.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include "file/GridControlData.h" // #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // dont show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! The identifier for a template file. */ #define GRD_FILE_TYPE_IDENTIFIER "affymetrix-calvin-grid-control" /*! The name of the dataSet which contains the B1 probes. */ #define GRD_FILE_B1_SET_NAME L"B1 Probes" /*! The name of the dataSet which contains the B2 probes. */ #define GRD_FILE_B2_SET_NAME L"B2 Probes" /*! The name of the dataSet which contains the non-synthesized features. */ #define GRD_FILE_NS_SET_NAME L"Non-Synthesized Features" /*! The name of the data dataGroup which holds the probe cooridnates. */ #define GRD_FILE_COORDINATE_GROUP_NAME L"Grid Control Probes" /*! The name of the rows parameter. */ #define GRD_ROWS_PARAMETER_NAME L"Rows" /*! The name of the columns parameter. */ #define GRD_COLUMNS_PARAMETER_NAME L"Columns" /*! This class reads a GRC data file. It is an interpreter class.*/ class GridControlFileReader { public: /*! Constructor */ GridControlFileReader(); /*! Destructor */ ~GridControlFileReader(); public: /*! Reads the contents of a GRC file. * * @param fileName The name of the file to read. * @param data A reference to an object that will receive information from the file. * @return True if successfully read. */ void Read(const std::string &fileName, affymetrix_grid_control::GridControlData& data); }; } #endif // _GridControlFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/ParameterFileReader.cpp0000644000175200017520000000442614516003651027311 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/ParameterFileReader.h" // #include "calvin_files/parsers/src/SAXParameterFileHandlers.h" // #include #include // #include #include // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; XERCES_CPP_NAMESPACE_USE; /* * Initialize the class. */ ParameterFileReader::ParameterFileReader() { } /* * Clear the data. */ ParameterFileReader::~ParameterFileReader() { } /* * Read the entire file using the XML SAX parser. */ bool ParameterFileReader::Read(const std::string &fileName, ParameterFileData ¶meterFileData) { parameterFileData.Clear(); // Initialize the XML4C2 system try { XMLPlatformUtils::Initialize(); } catch (const XMLException&) { return false; } bool status = false; SAXParser* parser = new SAXParser; parser->setValidationScheme(SAXParser::Val_Never); parser->setLoadExternalDTD(false); parser->setDoNamespaces(false); parser->setDoSchema(false); parser->setValidationSchemaFullChecking(false); SAXParameterFileHandlers handler(¶meterFileData); parser->setDocumentHandler(&handler); parser->setErrorHandler(&handler); try { parser->parse(fileName.c_str()); int errorCount = parser->getErrorCount(); if (errorCount == 0) { status = true; } } catch (...) { status = false; } delete parser; XMLPlatformUtils::Terminate(); return status; } affxparser/src/fusion/calvin_files/parsers/src/ParameterFileReader.h0000644000175200017520000000350514516003651026753 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ParameterFileReader_HEADER_ #define _ParameterFileReader_HEADER_ /*! \file ParameterFileReader.h This file provides interfaces to read a parameter file. */ #include "calvin_files/parameter/src/ParameterFileData.h" // #include #include #include // namespace affymetrix_calvin_io { /*! This class provides interfaces to read a parameter file. This version does not store all of the contents of a parameter file. Missing items includes the ParameterSet, MetaData and Control elements. */ class ParameterFileReader { public: /*! Constructor */ ParameterFileReader(); /*! Destructor */ ~ParameterFileReader(); public: /*! Reads the entire contents of the file. * * @param fileName The name of the parameter file to read. * @param parameterData The parameter data from the input file. */ bool Read(const std::string &fileName, affymetrix_calvin_parameter::ParameterFileData ¶meterFileData); }; }; #endif // _ParameterFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/SAXArrayHandlers.cpp0000644000175200017520000003626714516003651026571 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/SAXArrayHandlers.h" // #include "calvin_files/utils/src/StringUtils.h" // #include #include #include // using namespace affymetrix_calvin_array; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; using namespace std; XERCES_CPP_NAMESPACE_USE; #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #endif /*! Converts a string to an integer. * @param wstr The string to convert * @return The integer representation of the string. */ static int WideToInt(wstring const &wstr) { if (wstr.length() == 0) return 0; int val = 0; swscanf(wstr.c_str(), L"%d", &val); return val; } /*! Converts a string to an integer. * @param wstr The string to convert * @return The float representation of the string. */ static float WideToFloat(wstring const &wstr) { if (wstr.length() == 0) return 0.0f; float val=0.0f; swscanf(wstr.c_str(), L"%f", &val); return val; } /*! Converts a XML character string to wide string. * @param c1 The XML string to convert * @return The wide character string */ static wstring XMLChToWString(const XMLCh* const c1) { wstring s; int i=0; while (c1[i] != 0) { s += c1[i]; ++i; } return s; } /*! Comparison operator for XML strings to wide character strings. * @param c1 The XML string * @param c2 The wide string * @return True if equal */ static bool operator==(const XMLCh* const &c1, wstring c2) { if (c2.length() != XMLString::stringLen(c1)) return false; for (int i=0; i<(int)c2.length(); i++) { if (c2[i] != c1[i]) return false; } return true; } /* * Store the array data and set the starting element to the head. */ SAXArrayHandlers::SAXArrayHandlers(ArrayData *data, bool headerOnly) : arrayData(data), readHeaderOnly(headerOnly), currentElement(ARRAY_FILE) { } /* * Destructor */ SAXArrayHandlers::~SAXArrayHandlers() { } /* * No processing needed. This is only here to provide a derived function. */ void SAXArrayHandlers::startDocument() { } /* * No processing needed. This is only here to provide a derived function. */ void SAXArrayHandlers::endDocument() { } /* * Back up the current element. */ void SAXArrayHandlers::endElement(const XMLCh* const name) { MoveCurrentElementBack(name); } /* * Back up the current element. */ void SAXArrayHandlers::MoveCurrentElementBack(const XMLCh* const name) { if (name == 0) return; if (name == PHYSICAL_ARRAY_ATTRIBUTE_ELEMENT && currentElement == PHYSICAL_ARRAY_ATTRIBUTES) currentElement = PHYSICAL_ARRAY; else if (name == PHYSICAL_ARRAY_ELEMENT) currentElement = PHYSICAL_ARRAYS; else if (name == PHYSICAL_ARRAYS_ELEMENT) currentElement = ARRAY_FILE; else if (name == USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT && currentElement == USER_ATTRIBUTES_ATTRIBUTE_CONTROL) currentElement = USER_ATTRIBUTES_ATTRIBUTE; else if (name == USER_ATTRIBUTES_ATTRIBUTE_VALUE_ELEMENT && currentElement == USER_ATTRIBUTES_ATTRIBUTE_VALUE) currentElement = USER_ATTRIBUTES_ATTRIBUTE; else if (name == USER_ATTRIBUTES_ATTRIBUTE_ELEMENT && currentElement == USER_ATTRIBUTES_ATTRIBUTE_VALUE) currentElement = USER_ATTRIBUTES; else if (name == USER_ATTRIBUTES_ELEMENT) currentElement = ARRAY_FILE; } /* * Advance the current element. */ bool SAXArrayHandlers::MoveCurrentElementForward(const XMLCh* const name) { if (name == 0) return false; if (name == ARRAY_FILE_ELEMENT) currentElement = ARRAY_FILE; else if (name == PHYSICAL_ARRAYS_ELEMENT) currentElement = PHYSICAL_ARRAYS; else if (name == PHYSICAL_ARRAY_ELEMENT) currentElement = PHYSICAL_ARRAY; else if (name == PHYSICAL_ARRAY_ATTRIBUTE_ELEMENT && currentElement == PHYSICAL_ARRAY) currentElement = PHYSICAL_ARRAY_ATTRIBUTES; else if (name == USER_ATTRIBUTES_ELEMENT) currentElement = USER_ATTRIBUTES; else if (name == USER_ATTRIBUTES_ATTRIBUTE_ELEMENT && currentElement == USER_ATTRIBUTES) currentElement = USER_ATTRIBUTES_ATTRIBUTE; else if (name == USER_ATTRIBUTES_ATTRIBUTE_VALUE_ELEMENT && currentElement == USER_ATTRIBUTES_ATTRIBUTE) currentElement = USER_ATTRIBUTES_ATTRIBUTE_VALUE; else if (name == USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT && currentElement == USER_ATTRIBUTES_ATTRIBUTE) currentElement = USER_ATTRIBUTES_ATTRIBUTE_CONTROL; else if (name == ARRAY_FILE_ELEMENT || name == PHYSICAL_ARRAYS_ELEMENT || name == PHYSICAL_ARRAY_ELEMENT || name == PHYSICAL_ARRAY_ATTRIBUTE_ELEMENT || name == USER_ATTRIBUTES_ELEMENT || name == USER_ATTRIBUTES_ATTRIBUTE_ELEMENT || name == USER_ATTRIBUTES_ATTRIBUTE_VALUE_ELEMENT || name == USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT) { } else return false; return true; } /* * Set the current element based on the name and the current element. * Based on the current element, store the data in the array object. */ void SAXArrayHandlers::startElement(const XMLCh* const name, AttributeList& attributes) { if (MoveCurrentElementForward(name) == false) return; map atts; unsigned int len = attributes.getLength(); for (unsigned int index = 0; index < len; index++) { atts[XMLChToWString(attributes.getName(index))] = XMLChToWString(attributes.getValue(index)); } switch (currentElement) { case ARRAY_FILE: StoreArrayFileAttributes(atts); break; case PHYSICAL_ARRAY: StorePhysicalArrayElementAttributes(atts); break; case PHYSICAL_ARRAY_ATTRIBUTES: StorePhysicalArrayAttribute(atts); break; case USER_ATTRIBUTES_ATTRIBUTE: StoreUserAttribute(atts); break; case USER_ATTRIBUTES_ATTRIBUTE_CONTROL: StoreUserAttributeControl(atts); break; default: break; } if (readHeaderOnly) { SAXArrayStopParsingException e; throw e; } } /* * Based on the current element, assign the value. */ void SAXArrayHandlers::characters(const XMLCh *const chars, const XMLSize_t length) { wstring str = XMLChToWString(chars); if (currentElement == PHYSICAL_ARRAY_ATTRIBUTES) { int n = (int) arrayData->PhysicalArraysAttributes().size(); ParameterNameValuePairVector &arrayAtts = arrayData->PhysicalArraysAttributes()[n-1].Attributes(); n = (int) arrayAtts.size(); arrayAtts[n-1].Value = str; } else if (currentElement == USER_ATTRIBUTES_ATTRIBUTE_VALUE) { ParameterNameValueDefaultRequiredType *param = &(*arrayData->UserAttributes().rbegin()); switch (param->ValueType()) { case ParameterNameValueDefaultRequiredType::IntegerParameterType: param->SetValueInt32(WideToInt(str)); break; case ParameterNameValueDefaultRequiredType::FloatParameterType: param->SetValueFloat(WideToFloat(str)); break; case ParameterNameValueDefaultRequiredType::TextParameterType: case ParameterNameValueDefaultRequiredType::DateParameterType: case ParameterNameValueDefaultRequiredType::TimeParameterType: case ParameterNameValueDefaultRequiredType::DateTimeParameterType: case ParameterNameValueDefaultRequiredType::ControlSingleParameterType: param->SetValueText(str); break; case ParameterNameValueDefaultRequiredType::ControlMultiParameterType: param->ControlMultiValues().push_back(str); break; default: break; } } } /* * Store the user attribute. */ void SAXArrayHandlers::StoreUserAttribute(map &attributes) { ParameterNameValueDefaultRequiredType param; param.SetName(attributes[USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_NAME_ATTRIBUTE]); wstring value = attributes[USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_TYPE_ATTRIBUTE]; wstring defvalue = attributes[USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_DEFAULT_ATTRIBUTE]; if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::IntegerParameterType)) { param.SetValueInt32(0); param.ValueType() = ParameterNameValueDefaultRequiredType::IntegerParameterType; if (defvalue.length() > 0) param.SetDefaultValueInt32(WideToInt(defvalue)); } else if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::FloatParameterType)) { param.SetValueFloat(0.0f); param.ValueType() = ParameterNameValueDefaultRequiredType::FloatParameterType; if (defvalue.length() > 0) param.SetDefaultValueFloat(WideToFloat(defvalue)); } else { if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::TextParameterType)) { param.ValueType() = ParameterNameValueDefaultRequiredType::TextParameterType; } else if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::DateParameterType)) { param.ValueType() = ParameterNameValueDefaultRequiredType::DateParameterType; } else if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::TimeParameterType)) { param.ValueType() = ParameterNameValueDefaultRequiredType::TimeParameterType; } else if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::DateTimeParameterType)) { param.ValueType() = ParameterNameValueDefaultRequiredType::DateTimeParameterType; } else if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::ControlSingleParameterType)) { param.ValueType() = ParameterNameValueDefaultRequiredType::ControlSingleParameterType; } else if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::ControlMultiParameterType)) { param.ValueType() = ParameterNameValueDefaultRequiredType::ControlMultiParameterType; } param.SetValueText(L""); if (defvalue.length() > 0) param.SetDefaultValueText(defvalue); } value = attributes[USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_REQUIRED_ATTRIBUTE]; if (value == L"true") param.RequiredFlag() = true; else param.RequiredFlag() = false; arrayData->UserAttributes().push_back(param); } /* * Store the control value for a user attribute. */ void SAXArrayHandlers::StoreUserAttributeControl(map &attributes) { ParameterNameValueDefaultRequiredType *param = &(*arrayData->UserAttributes().rbegin()); param->ControlledVocabulary().push_back(attributes[USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT_VALUE_ATTRIBUTE]); } /* * Store the array attribute. */ void SAXArrayHandlers::StorePhysicalArrayAttribute(map &attributes) { int n = (int) arrayData->PhysicalArraysAttributes().size(); ParameterNameValuePairVector &arrayAtts = arrayData->PhysicalArraysAttributes()[n-1].Attributes(); n = (int) arrayAtts.size(); arrayAtts.resize(n+1); arrayAtts[n].Name = attributes[PHYSICAL_ARRAY_ATTRIBUTE_ELEMENT_NAME_ATTRIBUTE]; arrayAtts[n].Value = attributes[PHYSICAL_ARRAY_ATTRIBUTE_ELEMENT_VALUE_ATTRIBUTE]; } /* * Create a new entry in the physical array and store the ID. */ void SAXArrayHandlers::StorePhysicalArrayElementAttributes(map &attributes) { int n = (int) arrayData->PhysicalArraysAttributes().size(); arrayData->PhysicalArraysAttributes().resize(n+1); arrayData->PhysicalArraysAttributes()[n].Identifier() = StringUtils::ConvertWCSToMBS(attributes[PHYSICAL_ARRAY_ELEMENT_ID_ATTRIBUTE]); wstring str; str = attributes[PHYSICAL_ARRAY_ELEMENT_ROW_ATTRIBUTE]; if (str.length() > 0) { arrayData->PhysicalArraysAttributes()[n].MediaRow() = WideToInt(str); } str = attributes[PHYSICAL_ARRAY_ELEMENT_COL_ATTRIBUTE]; if (str.length() > 0) { arrayData->PhysicalArraysAttributes()[n].MediaCol() = WideToInt(str); } str = attributes[PHYSICAL_ARRAY_ELEMENT_MEDIA_FILE_NAME_ATTRIBUTE]; if (str.length() > 0) { arrayData->PhysicalArraysAttributes()[n].MediaFileName() = StringUtils::ConvertWCSToMBS(str); } str = attributes[PHYSICAL_ARRAY_ELEMENT_MEDIA_FILE_GUID_ATTRIBUTE]; if (str.length() > 0) { arrayData->PhysicalArraysAttributes()[n].MediaFileGUID() = StringUtils::ConvertWCSToMBS(str); } str = attributes[PHYSICAL_ARRAY_ELEMENT_LIB_PACKAGE_NAME_ATTRIBUTE]; if (str.length() > 0) { arrayData->PhysicalArraysAttributes()[n].LibraryPackageName() = StringUtils::ConvertWCSToMBS(str); } str = attributes[PHYSICAL_ARRAY_ELEMENT_NAME_ATTRIBUTE]; if (str.length() > 0) { arrayData->PhysicalArraysAttributes()[n].ArrayName() = StringUtils::ConvertWCSToMBS(str); } str = attributes[PHYSICAL_ARRAY_ELEMENT_BARCODE_ATTRIBUTE]; if (str.length() > 0) { arrayData->PhysicalArraysAttributes()[n].ArrayBarcode() = StringUtils::ConvertWCSToMBS(str); } str = attributes[PHYSICAL_ARRAY_ELEMENT_TYPE_ATTRIBUTE]; arrayData->PhysicalArraysAttributes()[n].Media() = MediaFromString(str); str = attributes[PHYSICAL_ARRAY_ELEMENT_MASTERFILE_ATTRIBUTE]; if (str.length() > 0) { arrayData->PhysicalArraysAttributes()[n].MasterFile() = StringUtils::ConvertWCSToMBS(str);; } str = attributes[PHYSICAL_ARRAY_ELEMENT_PAT_ASSIGNMENT_ATTRIBUTE]; arrayData->PhysicalArraysAttributes()[n].PatAssignment() = PATAssignmentMethodFromString(str); str = attributes[PHYSICAL_ARRAY_ELEMENT_MASTERFILE_GUID_ATTRIBUTE]; if (str.length() > 0) { arrayData->PhysicalArraysAttributes()[n].MasterFileId() = StringUtils::ConvertWCSToMBS(str); } arrayData->PhysicalArraysAttributes()[n].CreationDateTime() = attributes[PHYSICAL_ARRAY_ELEMENT_CREATION_DATE_ATTRIBUTE]; arrayData->PhysicalArraysAttributes()[n].CreatedBy() = attributes[PHYSICAL_ARRAY_ELEMENT_CREATED_BY_ATTRIBUTE]; arrayData->PhysicalArraysAttributes()[n].Comment() = attributes[PHYSICAL_ARRAY_ELEMENT_COMMENT_ATTRIBUTE]; str = attributes[ARRAY_FILE_ELEMENT_CREATED_STEP_ATTRIBUTE]; arrayData->PhysicalArraysAttributes()[n].CreatedStep() = CreateStepFromString(str); } /* * Store the attributes to the array data and member variables. */ void SAXArrayHandlers::StoreArrayFileAttributes(map &attributes) { wstring str; str = attributes[ARRAY_FILE_ELEMENT_TYPE_ATTRIBUTE]; if (str.length() > 0) { arrayData->DataTypeIdentifier() = StringUtils::ConvertWCSToMBS(str); } fileVersionNumber = attributes[ARRAY_FILE_ELEMENT_VERSION_ATTRIBUTE]; arrayData->ArraySetFileIdentifier() = StringUtils::ConvertWCSToMBS(attributes[ARRAY_FILE_ELEMENT_ID_ATTRIBUTE]); arrayData->InitialProject() = attributes[ARRAY_FILE_ELEMENT_PROJECT_ATTRIBUTE]; arrayData->CreationDateTime() = attributes[ARRAY_FILE_ELEMENT_CREATE_DATE_TIME_ATTRIBUTE]; arrayData->CreatedBy() = attributes[ARRAY_FILE_ELEMENT_CREATED_BY_ATTRIBUTE]; str = attributes[ARRAY_FILE_ELEMENT_CREATED_STEP_ATTRIBUTE]; arrayData->CreatedStep() = CreateStepFromString(str); } affxparser/src/fusion/calvin_files/parsers/src/SAXArrayHandlers.h0000644000175200017520000002564314516003651026232 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _SAXArrayHandlers_HEADER_ #define _SAXArrayHandlers_HEADER_ /*! \file SAXArrayHandlers.h This file provides SAX parsing handles for array files. */ #include "calvin_files/array/src/ArrayData.h" // #include #include #include // #include #include #include // namespace affymetrix_calvin_io { /*! The name of the DTD for the array file. */ #define ARRAY_FILE_DTD std::wstring(L"ArraySetAndTemplateFile.dtd") /*! The encoding to use for array files. */ #define ARRAY_FILE_ENCODING std::wstring(L"UTF-16") /*! The name of the element that contains the array file id, type and version data. */ #define ARRAY_FILE_ELEMENT std::wstring(L"ArraySetFile") /*! The attribute name of the ID field for the array file. */ #define ARRAY_FILE_ELEMENT_ID_ATTRIBUTE std::wstring(L"GUID") /*! The attribute name of the type field for the array file. */ #define ARRAY_FILE_ELEMENT_TYPE_ATTRIBUTE std::wstring(L"Type") /*! The attribute name of the version field for the array file. */ #define ARRAY_FILE_ELEMENT_VERSION_ATTRIBUTE std::wstring(L"Version") /*! The attribute name of the original project field for the array file. */ #define ARRAY_FILE_ELEMENT_PROJECT_ATTRIBUTE std::wstring(L"OriginalProjectName") /*! The attribute name of the creation date field for the array file. */ #define ARRAY_FILE_ELEMENT_CREATE_DATE_TIME_ATTRIBUTE std::wstring(L"CreatedDateTime") /*! The attribute name of the create by field for the array file. */ #define ARRAY_FILE_ELEMENT_CREATED_BY_ATTRIBUTE std::wstring(L"CreatedBy") /*! The attribute name of the create step field for the array file. */ #define ARRAY_FILE_ELEMENT_CREATED_STEP_ATTRIBUTE std::wstring(L"CreatedStep") /*! The name of the element that contains the list of physical arrays. */ #define PHYSICAL_ARRAYS_ELEMENT std::wstring(L"PhysicalArrays") /*! The name of the element that contains the attributes of a single physical array. */ #define PHYSICAL_ARRAY_ELEMENT std::wstring(L"PhysicalArray") /*! The attribute name of the type field for a physical array. */ #define PHYSICAL_ARRAY_ELEMENT_ARRAY_TYPE_ATTRIBUTE std::wstring(L"Type") /*! The attribute name of the ID field for a physical array. */ #define PHYSICAL_ARRAY_ELEMENT_ID_ATTRIBUTE std::wstring(L"GUID") /*! The attribute name of the array name field for a physical array. */ #define PHYSICAL_ARRAY_ELEMENT_NAME_ATTRIBUTE std::wstring(L"ArrayName") /*! The attribute name of the barcode field for a physical array. */ #define PHYSICAL_ARRAY_ELEMENT_BARCODE_ATTRIBUTE std::wstring(L"AffyBarcode") /*! The attribute name of the type field for a physical array. */ #define PHYSICAL_ARRAY_ELEMENT_TYPE_ATTRIBUTE std::wstring(L"MediaType") /*! The attribute name of the row field for a physical array. */ #define PHYSICAL_ARRAY_ELEMENT_ROW_ATTRIBUTE std::wstring(L"MediaRow") /*! The attribute name of the col field for a physical array. */ #define PHYSICAL_ARRAY_ELEMENT_COL_ATTRIBUTE std::wstring(L"MediaCol") /*! The attribute name of the media file name. */ #define PHYSICAL_ARRAY_ELEMENT_MEDIA_FILE_NAME_ATTRIBUTE std::wstring(L"MediaFileName") /*! The attribute name of the media file guid. */ #define PHYSICAL_ARRAY_ELEMENT_MEDIA_FILE_GUID_ATTRIBUTE std::wstring(L"MediaFileGUID") /*! The attribute name of the library file package name. */ #define PHYSICAL_ARRAY_ELEMENT_LIB_PACKAGE_NAME_ATTRIBUTE std::wstring(L"LibraryPackageName") /*! The attribute name of the master file field for a physical array. */ #define PHYSICAL_ARRAY_ELEMENT_MASTERFILE_ATTRIBUTE std::wstring(L"MasterFileName") /*! The attribute name of the master file guid field for a physical array. */ #define PHYSICAL_ARRAY_ELEMENT_MASTERFILE_GUID_ATTRIBUTE std::wstring(L"MasterFileGUID") /*! The attribute name of the pat assignment field for a physical array. */ #define PHYSICAL_ARRAY_ELEMENT_PAT_ASSIGNMENT_ATTRIBUTE std::wstring(L"PATAssignmentMethod") /*! The attribute name of the creation date field for a physical array. */ #define PHYSICAL_ARRAY_ELEMENT_CREATION_DATE_ATTRIBUTE std::wstring(L"CreatedDateTime") /*! The attribute name of the creation user field for a physical array. */ #define PHYSICAL_ARRAY_ELEMENT_CREATED_BY_ATTRIBUTE std::wstring(L"CreatedBy") /*! The attribute name of the comment field for a physical array. */ #define PHYSICAL_ARRAY_ELEMENT_COMMENT_ATTRIBUTE std::wstring(L"Comment") /*! The name of the element that contains a single attribute of a single physical array. */ #define PHYSICAL_ARRAY_ATTRIBUTE_ELEMENT std::wstring(L"ArrayAttribute") /*! The attribute name of the name field for a physical array attribute. */ #define PHYSICAL_ARRAY_ATTRIBUTE_ELEMENT_NAME_ATTRIBUTE std::wstring(L"Name") /*! The attribute name of the value field for a physical array attribute. */ #define PHYSICAL_ARRAY_ATTRIBUTE_ELEMENT_VALUE_ATTRIBUTE std::wstring(L"Value") /*! The name of the element that contains the user attributes. */ #define USER_ATTRIBUTES_ELEMENT std::wstring(L"UserAttributes") /*! The name of the element that contains a single user attribute. */ #define USER_ATTRIBUTES_ATTRIBUTE_ELEMENT std::wstring(L"UserAttribute") /*! The name of the element that contains a single user attribute. */ #define USER_ATTRIBUTES_ATTRIBUTE_VALUE_ELEMENT std::wstring(L"UserAttributeValue") /*! The attribute name of the name field for a user attribute. */ #define USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_NAME_ATTRIBUTE std::wstring(L"Name") /*! The attribute name of the type field for a user attribute. */ #define USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_TYPE_ATTRIBUTE std::wstring(L"Type") /*! The attribute name of the default value field for a user attribute. */ #define USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_DEFAULT_ATTRIBUTE std::wstring(L"DefaultValue") /*! The attribute name of the required field for a user attribute. */ #define USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_REQUIRED_ATTRIBUTE std::wstring(L"Required") /*! The attribute name of the value field for a user attribute. */ #define USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_VALUE_ATTRIBUTE std::wstring(L"Value") /*! The name of the element that contains a controlled vocabulary. */ #define USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT std::wstring(L"Control") /*! The attribute name of the value field for a controlled vocabulary. */ #define USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT_VALUE_ATTRIBUTE std::wstring(L"Value") /*! Enumerants to hold the elements in an array file. */ typedef enum { ARRAY_FILE, PHYSICAL_ARRAYS, PHYSICAL_ARRAY, PHYSICAL_ARRAY_ATTRIBUTES, USER_ATTRIBUTES, USER_ATTRIBUTES_ATTRIBUTE, USER_ATTRIBUTES_ATTRIBUTE_VALUE, USER_ATTRIBUTES_ATTRIBUTE_CONTROL } ArrayFileElements; /*! An exception for stoping parsing of a file. */ class SAXArrayStopParsingException : public XERCES_CPP_NAMESPACE::SAXException { }; /*! This class provides the SAX handlers for reading calvin array files. */ class SAXArrayHandlers : public XERCES_CPP_NAMESPACE::HandlerBase { private: /*! A pointer to the array object. */ affymetrix_calvin_array::ArrayData *arrayData; /*! A flag used to indicate that the header line should only be read. */ bool readHeaderOnly; /*! The parent element that is currently being processed. */ ArrayFileElements currentElement; /*! The files version number. */ std::wstring fileVersionNumber; /*! An identifier to the type of data stored in the file */ affymetrix_calvin_utilities::AffymetrixGuidType dataTypeIdentifier; /*! Stores the attributes from the array file element. * * @param attributes The name/value attributes. */ void StoreArrayFileAttributes(std::map &attributes); /*! Stores the attrobites from the physical array element. * * @param attributes The name/value attributes. */ void StorePhysicalArrayElementAttributes(std::map &attributes); /*! Stores an attribute from the physical array attribute element. * * @param attributes The name/value attributes. */ void StorePhysicalArrayAttribute(std::map &attributes); /*! Stores an attribute from the user attribute element. * * @param attributes The name/value attributes. */ void StoreUserAttribute(std::map &attributes); /*! Stores a control value from the user attribute element. * * @param attributes The name/value attributes. */ void StoreUserAttributeControl(std::map &attributes); /*! Modifies the state machine given the end of an element. * * @param name The name of the element. */ void MoveCurrentElementBack(const XMLCh* const name); /*! Modifies the state machine given the start of an element. * * @param name The name of the element. * @return True is a valid element was found. */ bool MoveCurrentElementForward(const XMLCh* const name); public: /*! Constructor * * @param data The array data. * @param headerOnly Flag to indicate that the header line should only be read. */ SAXArrayHandlers(affymetrix_calvin_array::ArrayData *data, bool headerOnly=false); /*! Destructor */ ~SAXArrayHandlers(); /*! Called at the start of the document */ void startDocument(); /*! Called at the end of the document */ void endDocument(); /*! Receive notification of character data inside an element. * @param chars The character data. * @param length The length of the character string. */ void characters(const XMLCh *const chars, const XMLSize_t length); /*! Called at the start of each element. * * @param name The name of the element. * @param attributes The attributes of the element. */ void startElement(const XMLCh* const name, XERCES_CPP_NAMESPACE::AttributeList& attributes); /*! Called at the end of each element. * * @param name The name of the element. */ void endElement(const XMLCh* const name); /*! The identifier of the type of data stored in the file. * * @return The identifier of the type of data. */ const affymetrix_calvin_utilities::AffymetrixGuidType &DataTypeIdentifier() const { return dataTypeIdentifier; } /*! The files version number. * * @return The file version. */ std::wstring FileVersionNumber() const { return fileVersionNumber; } }; }; #endif // _SAXArrayHandlers_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/SAXParameterFileHandlers.cpp0000644000175200017520000001437614516003651030230 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/SAXParameterFileHandlers.h" // #include "calvin_files/utils/src/StringUtils.h" // #include #include #include // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; using namespace std; XERCES_CPP_NAMESPACE_USE; #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #endif /*! Converts a XML character string to wide string. * @param c1 The XML string to convert * @return The wide character string */ static wstring XMLChToWString(const XMLCh* const c1) { wstring s; int i=0; while (c1[i] != 0) { s += c1[i]; ++i; } return s; } /*! Comparison operator for XML strings to wide character strings. * @param c1 The XML string * @param c2 The wide string * @return True if equal */ static bool operator==(const XMLCh* const &c1, wstring c2) { if (c2.length() != XMLString::stringLen(c1)) return false; for (int i=0; i<(int)c2.length(); i++) { if (c2[i] != c1[i]) return false; } return true; } /* * Store the data and set the starting element to the head. */ SAXParameterFileHandlers::SAXParameterFileHandlers(ParameterFileData *data) : parameterFileData(data), currentElement(PARAMETER_FILE) { } /* * Destructor */ SAXParameterFileHandlers::~SAXParameterFileHandlers() { } /* * No processing needed. This is only here to provide a derived function. */ void SAXParameterFileHandlers::startDocument() { } /* * No processing needed. This is only here to provide a derived function. */ void SAXParameterFileHandlers::endDocument() { } /* * Back up the current element. */ void SAXParameterFileHandlers::endElement(const XMLCh* const name) { MoveCurrentElementBack(name); } /* * Back up the current element. */ void SAXParameterFileHandlers::MoveCurrentElementBack(const XMLCh* const name) { if (name == 0) return; if (name == PARAMETER_ELEMENT) currentElement = PARAMETER_FILE; else if (name == IMPLEMENTATION_ELEMENT) currentElement = PARAMETER_FILE; } /* * Advance the current element. */ bool SAXParameterFileHandlers::MoveCurrentElementForward(const XMLCh* const name) { if (name == 0) return false; if (name == PARAMETER_FILE_ELEMENT) currentElement = PARAMETER_FILE; else if (name == IMPLEMENTATION_ELEMENT) currentElement = IMPLEMENTATION; else if (name == PARAMETER_ELEMENT) currentElement = PARAMETER; else return false; return true; } /* * Set the current element based on the name and the current element. * Based on the current element, store the data in the array object. */ void SAXParameterFileHandlers::startElement(const XMLCh* const name, AttributeList& attributes) { if (MoveCurrentElementForward(name) == false) return; map atts; unsigned int len = attributes.getLength(); for (unsigned int index = 0; index < len; index++) { atts[XMLChToWString(attributes.getName(index))] = XMLChToWString(attributes.getValue(index)); } switch (currentElement) { case PARAMETER_FILE: StoreParameterFileAttributes(atts); break; case IMPLEMENTATION: StoreImplementationAttributes(atts); break; case PARAMETER: StoreParameterAttributes(atts); break; default: break; } } /* * Based on the current element, assign the value. */ void SAXParameterFileHandlers::characters(const XMLCh *const chars, const unsigned int length) { } /* * Store the parameter attributes. */ void SAXParameterFileHandlers::StoreParameterAttributes(map &attributes) { ParameterType param; param.name = attributes[PARAMETER_NAME_ATTRIBUTE]; param.index = attributes[PARAMETER_INDEX_ATTRIBUTE]; param.displayName = attributes[PARAMETER_DISPLAY_NAME_ATTRIBUTE]; param.category = attributes[PARAMETER_CATEGORY_ATTRIBUTE]; param.isEditable = attributes[PARAMETER_ISEDITABLE_ATTRIBUTE]; param.type = attributes[PARAMETER_TYPE_ATTRIBUTE]; param.currentValue = attributes[PARAMETER_CURVALUE_ATTRIBUTE]; param.minValue = attributes[PARAMETER_MINVALUE_ATTRIBUTE]; param.maxValue = attributes[PARAMETER_MAXVALUE_ATTRIBUTE]; param.defaultValue = attributes[PARAMETER_DEFVALUE_ATTRIBUTE]; param.precision = attributes[PARAMETER_PRECISION_ATTRIBUTE]; param.maxLength = attributes[PARAMETER_MAXLENGTH_ATTRIBUTE]; param.description = attributes[PARAMETER_DESC_ATTRIBUTE]; parameterFileData->Parameters().push_back(param); } /* * Store the implementation attributes. */ void SAXParameterFileHandlers::StoreImplementationAttributes(map &attributes) { parameterFileData->ImplementationAttributes().name = attributes[IMPLEMENTATION_NAME_ATTRIBUTE]; parameterFileData->ImplementationAttributes().version = attributes[IMPLEMENTATION_VERSION_ATTRIBUTE]; parameterFileData->ImplementationAttributes().executableFileName = attributes[IMPLEMENTATION_EXE_FILE_NAME_ATTRIBUTE]; parameterFileData->ImplementationAttributes().description = attributes[IMPLEMENTATION_DESC_ATTRIBUTE]; } /* * Store the parameter file attributes */ void SAXParameterFileHandlers::StoreParameterFileAttributes(map &attributes) { parameterFileData->ParameterFileAttributes().company = attributes[PARAMETER_FILE_COMPANY_ATTRIBUTE]; parameterFileData->ParameterFileAttributes().userName = attributes[PARAMETER_FILE_USER_NAME_ATTRIBUTE]; parameterFileData->ParameterFileAttributes().contentVersion = attributes[PARAMETER_FILE_CONTENT_VERSION_ATTRIBUTE]; } affxparser/src/fusion/calvin_files/parsers/src/SAXParameterFileHandlers.h0000644000175200017520000001776714516003651027704 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _SAXParameterFileHandlers_HEADER_ #define _SAXParameterFileHandlers_HEADER_ /*! \file SAXParameterFileHandlers.h This file provides SAX parsing handles for parameter files. */ #include "calvin_files/parameter/src/ParameterFileData.h" // #include #include #include // #include #include #include // namespace affymetrix_calvin_io { /*! The name of the element that contains the parameter file attributes. */ #define PARAMETER_FILE_ELEMENT std::wstring(L"ParameterFile") /*! The attribute name of the company name field. */ #define PARAMETER_FILE_COMPANY_ATTRIBUTE std::wstring(L"company") /*! The attribute name of the user name field. */ #define PARAMETER_FILE_USER_NAME_ATTRIBUTE std::wstring(L"userName") /*! The attribute name of the content version field. */ #define PARAMETER_FILE_CONTENT_VERSION_ATTRIBUTE std::wstring(L"contentVersion") /*! The element that contains the implementation attributes. */ #define IMPLEMENTATION_ELEMENT std::wstring(L"Implementation") /*! The name attribute. */ #define IMPLEMENTATION_NAME_ATTRIBUTE std::wstring(L"name") /*! The version attribute. */ #define IMPLEMENTATION_VERSION_ATTRIBUTE std::wstring(L"version") /*! The executableFileName attribute. */ #define IMPLEMENTATION_EXE_FILE_NAME_ATTRIBUTE std::wstring(L"executableFileName") /*! The description attribute. */ #define IMPLEMENTATION_DESC_ATTRIBUTE std::wstring(L"description") /*! The ParameterSet element name. */ #define PARAMETER_SET_ELEMENT std::wstring(L"ParameterSet") /*! The Parameter element name. */ #define PARAMETER_ELEMENT std::wstring(L"Parameter") /*! The name attribute. */ #define PARAMETER_NAME_ATTRIBUTE std::wstring(L"name") /*! The index attribute. */ #define PARAMETER_INDEX_ATTRIBUTE std::wstring(L"index") /*! The display name attribute. */ #define PARAMETER_DISPLAY_NAME_ATTRIBUTE std::wstring(L"displayName") /*! The category attribute. */ #define PARAMETER_CATEGORY_ATTRIBUTE std::wstring(L"category") /*! The isEditable attribute. */ #define PARAMETER_ISEDITABLE_ATTRIBUTE std::wstring(L"isEditable") /*! The type attribute. */ #define PARAMETER_TYPE_ATTRIBUTE std::wstring(L"type") /*! String8 type. */ #define PARAMETER_STRING8_TYPE std::wstring(L"String8") /*! String16 type. */ #define PARAMETER_STRING16_TYPE std::wstring(L"String16") /*! Int8 type. */ #define PARAMETER_INT8_TYPE std::wstring(L"Int8") /*! UInt8 type. */ #define PARAMETER_UINT8_TYPE std::wstring(L"UInt8") /*! Int16 type. */ #define PARAMETER_INT16_TYPE std::wstring(L"Int16") /*! UInt16 type. */ #define PARAMETER_UINT16_TYPE std::wstring(L"UInt16") /*! Int32 type. */ #define PARAMETER_INT32_TYPE std::wstring(L"Int32") /*! UInt32 type. */ #define PARAMETER_UINT32_TYPE std::wstring(L"UInt32") /*! Float type. */ #define PARAMETER_FLOAT_TYPE std::wstring(L"Float") /*! SingleControl type. */ #define PARAMETER_SINGLE_CONTROL_TYPE std::wstring(L"SingleControl") /*! Boolean type. */ #define PARAMETER_BOOLEAN_TYPE std::wstring(L"Boolean") /*! Date type. */ #define PARAMETER_DATE_TYPE std::wstring(L"Date") /*! Time type. */ #define PARAMETER_TIME_TYPE std::wstring(L"Time") /*! DateTime type. */ #define PARAMETER_DATETIME_TYPE std::wstring(L"DateTime") /*! ExternalFile type. */ #define PARAMETER_EXT_FILE_TYPE std::wstring(L"ExternalFile") /*! Unknown type. */ #define PARAMETER_UNKNOWN_TYPE std::wstring(L"Unknown") /*! The currentValue attribute. */ #define PARAMETER_CURVALUE_ATTRIBUTE std::wstring(L"currentValue") /*! The minValue attribute. */ #define PARAMETER_MINVALUE_ATTRIBUTE std::wstring(L"minValue") /*! The maxValue attribute. */ #define PARAMETER_MAXVALUE_ATTRIBUTE std::wstring(L"maxValue") /*! The defaultValue attribute. */ #define PARAMETER_DEFVALUE_ATTRIBUTE std::wstring(L"defaultValue") /*! The precision attribute. */ #define PARAMETER_PRECISION_ATTRIBUTE std::wstring(L"precision") /*! The maxLength attribute. */ #define PARAMETER_MAXLENGTH_ATTRIBUTE std::wstring(L"maxLength") /*! The description attribute. */ #define PARAMETER_DESC_ATTRIBUTE std::wstring(L"description") /*! The MetaData element. */ #define META_DATA_ELEMENT std::wstring(L"MetaData") /*! The lastChangedBy attribute. */ #define META_DATA_LAST_CHANGED_BY_ATTRIBUTE std::wstring(L"lastChangedBy") /*! The lastChangedOn attribute */ #define META_DATA_LAST_CHANGED_ON_ATTRIBUTE std::wstring(L"lastChangedOn") /*! The isReadOnly attribute. */ #define META_DATA_IS_READONLY_ATTRIBUTE std::wstring(L"isReadOnly") /*! The Control element. */ #define CONTROL_ELEMENT std::wstring(L"Control") /*! Enumerants to hold the elements in an parameter file. */ typedef enum { PARAMETER_FILE, IMPLEMENTATION, PARAMETER_SET, PARAMETER_SETS, PARAMETERS, PARAMETER, META_DATA, CONTROL } ParameterFileElements; /*! This class provides the SAX handlers for reading calvin parameter files. */ class SAXParameterFileHandlers : public XERCES_CPP_NAMESPACE::HandlerBase { private: /*! A pointer to the parameter file data object. */ affymetrix_calvin_parameter::ParameterFileData *parameterFileData; /*! The parent element that is currently being processed. */ ParameterFileElements currentElement; /*! Stores the attributes from the parameter file element. * * @param attributes The name/value attributes. */ void StoreParameterFileAttributes(std::map &attributes); /*! Stores the attribites from the implementation element. * * @param attributes The name/value attributes. */ void StoreImplementationAttributes(std::map &attributes); /*! Stores the attributes from the parameter attribute element. * * @param attributes The name/value attributes. */ void StoreParameterAttributes(std::map &attributes); /*! Modifies the state machine given the end of an element. * * @param name The name of the element. */ void MoveCurrentElementBack(const XMLCh* const name); /*! Modifies the state machine given the start of an element. * * @param name The name of the element. * @return True is a valid element was found. */ bool MoveCurrentElementForward(const XMLCh* const name); public: /*! Constructor * * @param data The parameter file data. */ SAXParameterFileHandlers(affymetrix_calvin_parameter::ParameterFileData *data); /*! Destructor */ ~SAXParameterFileHandlers(); /*! Called at the start of the document */ void startDocument(); /*! Called at the end of the document */ void endDocument(); /*! Receive notification of character data inside an element. * @param chars The character data. * @param length The length of the character string. */ void characters(const XMLCh *const chars, const unsigned int length); /*! Called at the start of each element. * * @param name The name of the element. * @param attributes The attributes of the element. */ void startElement(const XMLCh* const name, XERCES_CPP_NAMESPACE::AttributeList& attributes); /*! Called at the end of each element. * * @param name The name of the element. */ void endElement(const XMLCh* const name); }; }; #endif // _SAXParameterFileHandlers_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/SAXTemplateHandlers.cpp0000644000175200017520000002574214516003651027262 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/SAXTemplateHandlers.h" // #include "calvin_files/utils/src/StringUtils.h" // #include #include #include // using namespace affymetrix_calvin_template; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; using namespace std; XERCES_CPP_NAMESPACE_USE; #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #endif /*! Converts a string to an integer. * @param wstr The string to convert * @return The integer representation of the string. */ static int WideToInt(wstring const &wstr) { if (wstr.length() == 0) return 0; int val = 0; swscanf(wstr.c_str(), L"%d", &val); return val; } /*! Converts a string to an integer. * @param wstr The string to convert * @return The float representation of the string. */ static float WideToFloat(wstring const &wstr) { if (wstr.length() == 0) return 0.0f; float val=0.0f; swscanf(wstr.c_str(), L"%f", &val); return val; } /*! Converts a XML character string to wide string. * @param c1 The XML string to convert * @return The wide character string */ static wstring XMLChToWString(const XMLCh* const c1) { wstring s; int i=0; while (c1[i] != 0) { s += c1[i]; ++i; } return s; } /*! Comparison operator for XML strings to wide character strings. * @param c1 The XML string * @param c2 The wide string * @return True if equal */ static bool operator==(const XMLCh* const &c1, wstring c2) { if (c2.length() != XMLString::stringLen(c1)) return false; for (int i=0; i<(int)c2.length(); i++) { if (c2[i] != c1[i]) return false; } return true; } /* * Store the template data and set the starting element to the head. */ SAXTemplateHandlers::SAXTemplateHandlers(TemplateData *data, bool headerOnly) : templateData(data), readHeaderOnly(headerOnly), currentElement(TEMPLATE_FILE) { } /* * Destructor */ SAXTemplateHandlers::~SAXTemplateHandlers() { } /* * No processing needed. This is only here to provide a derived function. */ void SAXTemplateHandlers::startDocument() { } /* * No processing needed. This is only here to provide a derived function. */ void SAXTemplateHandlers::endDocument() { } /* * Back up the current element. */ void SAXTemplateHandlers::endElement(const XMLCh* const name) { MoveCurrentElementBack(name); } /* * Back up the current element. */ void SAXTemplateHandlers::MoveCurrentElementBack(const XMLCh* const name) { if (name == 0) return; if (name == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT && currentElement == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_CONTROL) currentElement = TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE; else if (name == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_VALUE_ELEMENT && currentElement == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_VALUE) currentElement = TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE; else if (name == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT && currentElement == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_VALUE) currentElement = TEMPLATE_USER_ATTRIBUTES; else if (name == TEMPLATE_USER_ATTRIBUTES_ELEMENT) currentElement = TEMPLATE_FILE; } /* * Advance the current element. */ bool SAXTemplateHandlers::MoveCurrentElementForward(const XMLCh* const name) { if (name == 0) return false; if (name == TEMPLATE_FILE_ELEMENT) currentElement = TEMPLATE_FILE; else if (name == TEMPLATE_USER_ATTRIBUTES_ELEMENT) currentElement = TEMPLATE_USER_ATTRIBUTES; else if (name == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT && currentElement == TEMPLATE_USER_ATTRIBUTES) currentElement = TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE; else if (name == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_VALUE_ELEMENT && currentElement == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE) currentElement = TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_VALUE; else if (name == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT && currentElement == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE) currentElement = TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_CONTROL; else if (name == TEMPLATE_FILE_ELEMENT || name == TEMPLATE_USER_ATTRIBUTES_ELEMENT || name == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT || name == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_VALUE_ELEMENT || name == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT) { } else return false; return true; } /* * Set the current element based on the name and the current element. * Based on the current element, store the data in the template object. */ void SAXTemplateHandlers::startElement(const XMLCh* const name, AttributeList& attributes) { if (MoveCurrentElementForward(name) == false) return; map atts; unsigned int len = attributes.getLength(); for (unsigned int index = 0; index < len; index++) { atts[XMLChToWString(attributes.getName(index))] = XMLChToWString(attributes.getValue(index)); } switch (currentElement) { case TEMPLATE_FILE: StoreTemplateFileAttributes(atts); break; case TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE: StoreUserAttribute(atts); break; case TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_CONTROL: StoreUserAttributeControl(atts); break; default: break; } if (readHeaderOnly) { SAXTemplateStopParsingException e; throw e; } } /* * Based on the current element, assign the value. */ void SAXTemplateHandlers::characters(const XMLCh *const chars, const XMLSize_t length) { wstring str = XMLChToWString(chars); if (currentElement == TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_VALUE) { ParameterNameValueDefaultRequiredType *param = &(*templateData->UserAttributes().rbegin()); switch (param->ValueType()) { case ParameterNameValueDefaultRequiredType::IntegerParameterType: param->SetValueInt32(WideToInt(str)); break; case ParameterNameValueDefaultRequiredType::FloatParameterType: param->SetValueFloat(WideToFloat(str)); break; case ParameterNameValueDefaultRequiredType::TextParameterType: case ParameterNameValueDefaultRequiredType::DateParameterType: case ParameterNameValueDefaultRequiredType::TimeParameterType: case ParameterNameValueDefaultRequiredType::DateTimeParameterType: case ParameterNameValueDefaultRequiredType::ControlSingleParameterType: param->SetValueText(str); break; case ParameterNameValueDefaultRequiredType::ControlMultiParameterType: param->ControlMultiValues().push_back(str); break; default: break; } } } /* * Store the user attribute. */ void SAXTemplateHandlers::StoreUserAttribute(map &attributes) { ParameterNameValueDefaultRequiredType param; param.SetName(attributes[TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_NAME_ATTRIBUTE]); wstring value = attributes[TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_TYPE_ATTRIBUTE]; wstring defvalue = attributes[TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_DEFAULT_ATTRIBUTE]; if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::IntegerParameterType)) { param.SetValueInt32(0); param.ValueType() = ParameterNameValueDefaultRequiredType::IntegerParameterType; if (defvalue.length() > 0) param.SetDefaultValueInt32(WideToInt(defvalue)); } else if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::FloatParameterType)) { param.SetValueFloat(0.0f); param.ValueType() = ParameterNameValueDefaultRequiredType::FloatParameterType; if (defvalue.length() > 0) param.SetDefaultValueFloat(WideToFloat(defvalue)); } else { if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::TextParameterType)) { param.ValueType() = ParameterNameValueDefaultRequiredType::TextParameterType; } else if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::DateParameterType)) { param.ValueType() = ParameterNameValueDefaultRequiredType::DateParameterType; } else if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::TimeParameterType)) { param.ValueType() = ParameterNameValueDefaultRequiredType::TimeParameterType; } else if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::DateTimeParameterType)) { param.ValueType() = ParameterNameValueDefaultRequiredType::DateTimeParameterType; } else if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::ControlSingleParameterType)) { param.ValueType() = ParameterNameValueDefaultRequiredType::ControlSingleParameterType; } else if (value == ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(ParameterNameValueDefaultRequiredType::ControlMultiParameterType)) { param.ValueType() = ParameterNameValueDefaultRequiredType::ControlMultiParameterType; } param.SetValueText(L""); if (defvalue.length() > 0) param.SetDefaultValueText(defvalue); } value = attributes[TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_REQUIRED_ATTRIBUTE]; if (value == L"Yes") param.RequiredFlag() = true; else param.RequiredFlag() = false; templateData->UserAttributes().push_back(param); } /* * Store the control value for a user attribute. */ void SAXTemplateHandlers::StoreUserAttributeControl(map &attributes) { ParameterNameValueDefaultRequiredType *param = &(*templateData->UserAttributes().rbegin()); param->ControlledVocabulary().push_back(attributes[TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT_VALUE_ATTRIBUTE]); } /* * Store the attributes to the template data and member variables. */ void SAXTemplateHandlers::StoreTemplateFileAttributes(map &attributes) { wstring str; str = attributes[TEMPLATE_FILE_ELEMENT_TYPE_ATTRIBUTE]; if (str.length() > 0) { templateData->DataTypeIdentifier() = StringUtils::ConvertWCSToMBS(str); } fileVersionNumber = attributes[TEMPLATE_FILE_ELEMENT_VERSION_ATTRIBUTE]; templateData->TemplateFileIdentifier() = StringUtils::ConvertWCSToMBS(attributes[TEMPLATE_FILE_ELEMENT_ID_ATTRIBUTE]); templateData->CreationDateTime() = attributes[TEMPLATE_FILE_ELEMENT_CREATE_DATE_TIME_ATTRIBUTE]; templateData->CreatedBy() = attributes[TEMPLATE_FILE_ELEMENT_CREATED_BY_ATTRIBUTE]; } affxparser/src/fusion/calvin_files/parsers/src/SAXTemplateHandlers.h0000644000175200017520000001647114516003651026726 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _SAXTemplateHandlers_HEADER_ #define _SAXTemplateHandlers_HEADER_ /*! \file SAXTemplateHandlers.h This file provides SAX parsing handles for template files. */ #include "calvin_files/template/src/TemplateData.h" // #include #include #include // #include #include #include // namespace affymetrix_calvin_io { /*! The name of the DTD for the template file. */ #define TEMPLATE_FILE_DTD std::wstring(L"ArraySetAndTemplateFile.dtd") /*! The encoding to use for template files. */ #define TEMPLATE_FILE_ENCODING std::wstring(L"UTF-16") /*! The name of the element that contains the template file id, type and version data. */ #define TEMPLATE_FILE_ELEMENT std::wstring(L"TemplateFile") /*! The attribute name of the ID field for the template file. */ #define TEMPLATE_FILE_ELEMENT_ID_ATTRIBUTE std::wstring(L"GUID") /*! The attribute name of the type field for the template file. */ #define TEMPLATE_FILE_ELEMENT_TYPE_ATTRIBUTE std::wstring(L"Type") /*! The attribute name of the version field for the template file. */ #define TEMPLATE_FILE_ELEMENT_VERSION_ATTRIBUTE std::wstring(L"Version") /*! The attribute name of the creation date field for the template file. */ #define TEMPLATE_FILE_ELEMENT_CREATE_DATE_TIME_ATTRIBUTE std::wstring(L"CreatedDateTime") /*! The attribute name of the create by field for the template file. */ #define TEMPLATE_FILE_ELEMENT_CREATED_BY_ATTRIBUTE std::wstring(L"CreatedBy") /*! The name of the element that contains the user attributes. */ #define TEMPLATE_USER_ATTRIBUTES_ELEMENT std::wstring(L"UserAttributes") /*! The name of the element that contains a single user attribute. */ #define TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT std::wstring(L"UserAttribute") /*! The name of the element that contains a single user attribute. */ #define TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_VALUE_ELEMENT std::wstring(L"UserAttributeValue") /*! The attribute name of the name field for a user attribute. */ #define TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_NAME_ATTRIBUTE std::wstring(L"Name") /*! The attribute name of the type field for a user attribute. */ #define TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_TYPE_ATTRIBUTE std::wstring(L"Type") /*! The attribute name of the default value field for a user attribute. */ #define TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_DEFAULT_ATTRIBUTE std::wstring(L"DefaultValue") /*! The attribute name of the required field for a user attribute. */ #define TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_REQUIRED_ATTRIBUTE std::wstring(L"Required") /*! The attribute name of the value field for a user attribute. */ #define TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_VALUE_ATTRIBUTE std::wstring(L"Value") /*! The name of the element that contains a controlled vocabulary. */ #define TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT std::wstring(L"Control") /*! The attribute name of the value field for a controlled vocabulary. */ #define TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT_VALUE_ATTRIBUTE std::wstring(L"Value") /*! Enumerants to hold the elements in an template file. */ typedef enum { TEMPLATE_FILE, TEMPLATE_USER_ATTRIBUTES, TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE, TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_VALUE, TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_CONTROL } TemplateFileElements; /*! An exception for stoping parsing of a file. */ class SAXTemplateStopParsingException : public XERCES_CPP_NAMESPACE::SAXException { }; /*! This class provides the SAX handlers for reading calvin template files. */ class SAXTemplateHandlers : public XERCES_CPP_NAMESPACE::HandlerBase { private: /*! A pointer to the template object. */ affymetrix_calvin_template::TemplateData *templateData; /*! A flag used to indicate that the header line should only be read. */ bool readHeaderOnly; /*! The parent element that is currently being processed. */ TemplateFileElements currentElement; /*! The files version number. */ std::wstring fileVersionNumber; /*! An identifier to the type of data stored in the file */ affymetrix_calvin_utilities::AffymetrixGuidType dataTypeIdentifier; /*! Stores the attributes from the template file element. * * @param attributes The name/value attributes. */ void StoreTemplateFileAttributes(std::map &attributes); /*! Stores an attribute from the user attribute element. * * @param attributes The name/value attributes. */ void StoreUserAttribute(std::map &attributes); /*! Stores a control value from the user attribute element. * * @param attributes The name/value attributes. */ void StoreUserAttributeControl(std::map &attributes); /*! Modifies the state machine given the end of an element. * * @param name The name of the element. */ void MoveCurrentElementBack(const XMLCh* const name); /*! Modifies the state machine given the start of an element. * * @param name The name of the element. * @return True is a valid element was found. */ bool MoveCurrentElementForward(const XMLCh* const name); public: /*! Constructor * * @param data The template data. * @param headerOnly Flag to indicate that the header line should only be read. */ SAXTemplateHandlers(affymetrix_calvin_template::TemplateData *data, bool headerOnly=false); /*! Destructor */ ~SAXTemplateHandlers(); /*! Called at the start of the document */ void startDocument(); /*! Called at the end of the document */ void endDocument(); /*! Receive notification of character data inside an element. * @param chars The character data. * @param length The length of the character string. */ void characters(const XMLCh *const chars, const XMLSize_t length); /*! Called at the start of each element. * * @param name The name of the element. * @param attributes The attributes of the element. */ void startElement(const XMLCh* const name, XERCES_CPP_NAMESPACE::AttributeList& attributes); /*! Called at the end of each element. * * @param name The name of the element. */ void endElement(const XMLCh* const name); /*! The identifier of the type of data stored in the file. * * @return The identifier of the type of data. */ const affymetrix_calvin_utilities::AffymetrixGuidType &DataTypeIdentifier() const { return dataTypeIdentifier; } /*! The files version number. * * @return The file version. */ std::wstring FileVersionNumber() const { return fileVersionNumber; } }; }; #endif // _SAXTemplateHandlers_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/TemplateFileReader.cpp0000644000175200017520000000643714516003651027150 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/parsers/src/TemplateFileReader.h" // #include "calvin_files/parsers/src/SAXTemplateHandlers.h" // #include #include // #include #include // using namespace affymetrix_calvin_template; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; XERCES_CPP_NAMESPACE_USE; /* * Initialize the class. */ TemplateFileReader::TemplateFileReader() { } /* * Clear the data. */ TemplateFileReader::~TemplateFileReader() { } /* * Read the entire file using the XML SAX parser. */ bool TemplateFileReader::Read(const std::string &fileName, affymetrix_calvin_template::TemplateData &templateData, bool headerOnly) { templateData.Clear(); // Initialize the XML4C2 system try { XMLPlatformUtils::Initialize(); } catch (const XMLException&) { return false; } bool status = false; SAXParser* parser = new SAXParser; parser->setValidationScheme(SAXParser::Val_Never); parser->setLoadExternalDTD(false); parser->setDoNamespaces(false); parser->setDoSchema(false); parser->setValidationSchemaFullChecking(false); SAXTemplateHandlers handler(&templateData, headerOnly); parser->setDocumentHandler(&handler); parser->setErrorHandler(&handler); try { parser->parse(fileName.c_str()); int errorCount = parser->getErrorCount(); if (errorCount == 0) { status = true; fileVersionNumber = handler.FileVersionNumber(); } } catch (SAXTemplateStopParsingException) { status = true; fileVersionNumber = handler.FileVersionNumber(); } catch (...) { status = false; } delete parser; XMLPlatformUtils::Terminate(); return status; } /* * Check if the data type matches what is in the file. */ bool TemplateFileReader::IsFileType(const std::string &fileName, const affymetrix_calvin_utilities::AffymetrixGuidType &dataTypeId) { return (TemplateFileReader::DataTypeIdentifier(fileName) == dataTypeId); } /* * Read just the first few entries to determine if this file is * of the right type. Check the magic number, version number * and data type identifier. If they all match then this is the right * type of file. */ affymetrix_calvin_utilities::AffymetrixGuidType TemplateFileReader::DataTypeIdentifier(const std::string &fileName) { TemplateFileReader reader; TemplateData templateData; reader.Read(fileName, templateData, true); return templateData.DataTypeIdentifier(); } affxparser/src/fusion/calvin_files/parsers/src/TemplateFileReader.h0000644000175200017520000000513114516003651026603 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _TemplateFileReader_HEADER_ #define _TemplateFileReader_HEADER_ /*! \file TemplateFileReader.h This file provides interfaces to read a template file. */ #include "calvin_files/parameter/src/Parameter.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/template/src/TemplateData.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include // namespace affymetrix_calvin_io { /*! This class provides interfaces to read a template file. */ class TemplateFileReader { public: /*! Constructor */ TemplateFileReader(); /*! Destructor */ ~TemplateFileReader(); protected: /*! The files version number. */ std::wstring fileVersionNumber; public: /*! Reads the entire contents of the file. * * @param fileName The name of the template file to read. * @param templateData The template data to read from the file. * @param headerOnly Flag to indicate that the header line should only be read. */ bool Read(const std::string &fileName, affymetrix_calvin_template::TemplateData &templateData, bool headerOnly=false); /*! Determines if a file is of the specified type. * * @param fileName The name of the file to check. * @param dataTypeId An identifier to the type of data. * @return True if the file matches the type desired. */ static bool IsFileType(const std::string &fileName, const affymetrix_calvin_utilities::AffymetrixGuidType &dataTypeId); /*! The identifier of the type of data stored in the file. * * @param fileName The name of the file to check. * @return The identifier of the type of data. */ static affymetrix_calvin_utilities::AffymetrixGuidType DataTypeIdentifier(const std::string &fileName); }; }; #endif // _TemplateFileReader_HEADER_ affxparser/src/fusion/calvin_files/parsers/src/TextFileReader.cpp0000644000175200017520000000526414516003651026316 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "calvin_files/parsers/src/TextFileReader.h" // #include "util/Fs.h" // #include #include #include // using namespace affymetrix_calvin_io; /* * Initialize the class. */ TextFileReader::TextFileReader() { } /* * Clean up. */ TextFileReader::~TextFileReader() { } /* * Read the entire file, the header and body. */ void TextFileReader::ReadFile(const std::string &fileName, std::map &textData) { textData.clear(); std::ifstream fileStream; Fs::aptOpen(fileStream, fileName); if (!fileStream) { affymetrix_calvin_exceptions::FileNotFoundException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } ReadFileBody(fileStream, textData); fileStream.close(); } /* * Read the contents of the file using the input file stream. */ void TextFileReader::ReadFile(std::ifstream &fileStream, std::map &textData) { ReadFileBody(fileStream, textData); } /* * Read the file body. Each line of the file should contain * either a comment, blank line or name=value parameter. * Comment lines start with a # sign. */ void TextFileReader::ReadFileBody(std::ifstream &fileStream, std::map &textData) { const int MAX_LINE_LENGTH = 1024; char line[MAX_LINE_LENGTH]; std::string name; std::string value; // Read each line and store only the name/value parameters. while (fileStream.getline(line, MAX_LINE_LENGTH)) { if (line[0] == '#') continue; while (strlen(line) > 0 && iscntrl(line[strlen(line)-1])) { line[strlen(line)-1] = 0; } char *index = strchr(line, '='); if (index) { value = (index+1); line[ strlen(line) - strlen(index)] = 0; name = line; textData[name] = value; } } } affxparser/src/fusion/calvin_files/parsers/src/TextFileReader.h0000644000175200017520000000455114516003651025761 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _TextFileReader_HEADER_ #define _TextFileReader_HEADER_ /*! \file TextFileReader.h This file provides interfaces to store information in a text parameter file. */ #include "calvin_files/parsers/src/FileException.h" // #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // dont show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class provides interfaces to store text parameter information. */ class TextFileReader { public: /*! Constructor */ TextFileReader(); /*! Destructor */ ~TextFileReader(); protected: /*! Reads the body of the file. * * @param fileStream The file stream. * @param textData The parameter name/value pairs. */ void ReadFileBody(std::ifstream &fileStream, std::map &textData); public: /*! Reads the entire contents of the file. * * @param fileName The name of the text file to read. * @param textData The text file parameter name/value pairs. * @exception affymetrix_calvin_exceptions::CFileNotFoundException The file does not exist. */ void ReadFile(const std::string &fileName, std::map &textData); /*! Reads the entire contents of the file. * * @param fileStream The file stream of the open text file. * @param textData The text file parameter name/value pairs. */ void ReadFile(std::ifstream &fileStream, std::map &textData); }; }; #endif // _TextFileReader_HEADER_ affxparser/src/fusion/calvin_files/portability/0000755000175200017520000000000014516003651023030 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/portability/src/0000755000175200017520000000000014516003651023617 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/portability/src/AffymetrixBaseTypes.h0000644000175200017520000000531714516003651027734 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffymetrixBaseTypes_HEADER_ #define _AffymetrixBaseTypes_HEADER_ /*! \file AffymetrixBaseTypes.h This file provides type definitions for atom types. */ #include "R_affx_constants.h" #ifdef WIN64 #include #endif /* Define the type for the Windows platform. */ #ifdef WIN32 #ifndef __MINGW32__ /*! An 8 bit signed number */ typedef char int8_t; /*! A 16 bit signed number */ typedef short int int16_t; /*! A 32 bit signed number */ typedef int int32_t; /*! A 64 bit signed number */ typedef long long int int64_t; /*! An 8 bit unsigned number */ typedef unsigned char u_int8_t; /*! A 16 bit unsigned number */ typedef unsigned short int u_int16_t; /*! A 32 bit unsigned number */ typedef unsigned int u_int32_t; /*! A 64 bit unsigned number */ typedef unsigned long long int u_int64_t; #else // __MINGW32__ /*! An 8 bit unsigned number */ typedef unsigned char u_int8_t; /*! A 16 bit unsigned number */ typedef unsigned short int u_int16_t; /*! A 32 bit unsigned number */ typedef unsigned int u_int32_t; /*! A 64 bit unsigned number */ typedef unsigned long long int u_int64_t; #endif // __MINGW32__ #else // WIN32 /* Use the sys/types.h for non Windows platforms */ #include #ifndef sun #include #else // sun /*! An 8 bit unsigned number */ typedef unsigned char u_int8_t; /*! A 16 bit unsigned number */ typedef unsigned short int u_int16_t; /*! A 32 bit unsigned number */ typedef unsigned int u_int32_t; /*! A 64 bit unsigned number */ typedef unsigned long long int u_int64_t; #endif // sun #endif //WIN32 ////////// /* This is also in "affy-base-types.h" * Be sure to only get it once. (keep them in sync!) * The documentation is there too. */ #ifndef _AFFY_TYPE_PUNNED_ #define _AFFY_TYPE_PUNNED_ union type_punned { float v_float; int v_int32; unsigned int v_uint32; }; #endif #endif // _AffymetrixBaseTypes_HEADER_ affxparser/src/fusion/calvin_files/template/0000755000175200017520000000000014516003651022301 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/template/src/0000755000175200017520000000000014516003651023070 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/template/src/TemplateData.cpp0000644000175200017520000000234114516003651026141 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/template/src/TemplateData.h" // using namespace affymetrix_calvin_template; /* * Initialize the class. */ TemplateData::TemplateData() { } /* * Clean up. */ TemplateData::~TemplateData() { Clear(); } /* * Clears the member variables. */ void TemplateData::Clear() { userAttributes.clear(); fileId=""; dataTypeId=""; creationDateTime=L""; createdBy=L""; } affxparser/src/fusion/calvin_files/template/src/TemplateData.h0000644000175200017520000000560214516003651025611 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _TemplateData_HEADER_ #define _TemplateData_HEADER_ /*! \file TemplateData.h This file provides interfaces to store information in a template file. */ #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include // namespace affymetrix_calvin_template { /*! This class provides interfaces to store array template information. */ class TemplateData { public: /*! Constructor */ TemplateData(); /*! Destructor */ ~TemplateData(); protected: /*! A unique idendifier for the array set object */ affymetrix_calvin_utilities::AffymetrixGuidType fileId; /*! An identifier to the type of data stored in the file */ affymetrix_calvin_utilities::AffymetrixGuidType dataTypeId; /*! The date and time of initial creation. */ std::wstring creationDateTime; /*! The user who created the data object. */ std::wstring createdBy; /*! The user attributes */ affymetrix_calvin_parameter::ParameterNameValueDefaultRequiredTypeList userAttributes; public: /*! The unique idendifier for the array set. * @return The unique idendifier for the array set. */ affymetrix_calvin_utilities::AffymetrixGuidType &TemplateFileIdentifier() { return fileId; } /*! The identifier of the type of data stored in the file. * @return The identifier of the type of data. */ affymetrix_calvin_utilities::AffymetrixGuidType &DataTypeIdentifier() { return dataTypeId; } /*! The date and time of initial creation. * @return The creation date and time. */ std::wstring &CreationDateTime() { return creationDateTime; } /*! The user who created the data object. * @return The user name. */ std::wstring &CreatedBy() { return createdBy; } /*! The user attributes. * @return The vector of user attributes. */ affymetrix_calvin_parameter::ParameterNameValueDefaultRequiredTypeList &UserAttributes() { return userAttributes; } /*! Clears the member objects. */ void Clear(); }; }; #endif // _TemplateData_HEADER_ affxparser/src/fusion/calvin_files/template/src/TemplateId.h0000644000175200017520000000215014516003651025267 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _TemplateId_HEADER_ #define _TemplateId_HEADER_ /*! \file TemplateId.h This file defines the ID for the template file. */ /*! The identifier for a template file. */ #define TEMPLATE_FILE_TYPE_IDENTIFIER "affymetrix-calvin-template" #endif // _TemplateId_HEADER_ affxparser/src/fusion/calvin_files/utils/0000755000175200017520000000000014516003651021626 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/utils/src/0000755000175200017520000000000014516022540022413 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/utils/src/AffyStlCollectionTypes.h0000644000175200017520000001163614516003651027206 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffyStlCollectionTypes_HEADER_ #define _AffyStlCollectionTypes_HEADER_ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include #include #include // /*! 8 bit signed */ typedef std::vector Int8Vector; typedef std::vector::iterator Int8VectorIt; typedef std::vector::const_iterator Int8VectorConstIt; typedef std::list Int8List; typedef std::list::iterator Int8ListIt; typedef std::list::const_iterator Int8ListConstIt; /*! 16 bit signed */ typedef std::vector Int16Vector; typedef std::vector::iterator Int16VectorIt; typedef std::vector::const_iterator Int16VectorConstIt; typedef std::list Int16List; typedef std::list::iterator Int16ListIt; typedef std::list::const_iterator Int16ListConstIt; /*! 32 bit signed */ typedef std::vector Int32Vector; typedef std::vector::iterator Int32VectorIt; typedef std::vector::const_iterator Int32VectorConstIt; typedef std::list Int32List; typedef std::list::iterator Int32ListIt; typedef std::list::const_iterator Int32ListConstIt; /*! 64 bit signed */ typedef std::vector Int64Vector; typedef std::vector::iterator Int64VectorIt; typedef std::vector::const_iterator Int64VectorConstIt; /*! 8 bit unsigned */ typedef std::vector Uint8Vector; typedef std::vector::iterator Uint8VectorIt; typedef std::vector::const_iterator Uint8VectorConstIt; typedef std::list Uint8List; typedef std::list::iterator Uint8ListIt; typedef std::list::const_iterator Uint8ListConstIt; /*! 16 bit unsigned */ typedef std::vector Uint16Vector; typedef std::vector::iterator Uint16VectorIt; typedef std::vector::const_iterator Uint16VectorConstIt; typedef std::list Uint16List; typedef std::list::iterator Uint16ListIt; typedef std::list::const_iterator Uint16ListConstIt; /*! 32 bit unsigned */ typedef std::vector Uint32Vector; typedef std::vector::iterator Uint32VectorIt; typedef std::vector::const_iterator Uint32VectorConstIt; typedef std::list Uint32List; typedef std::list::iterator Uint32ListIt; typedef std::list::const_iterator Uint32ListConstIt; /*! 64 bit unsigned */ typedef std::vector Uint64Vector; typedef std::vector::iterator Uint64VectorIt; typedef std::vector::const_iterator Uint64VectorConstIt; typedef std::list Uin64List; typedef std::list::iterator Uint64ListIt; typedef std::list::const_iterator Uint64ListConstIt; /*! float */ typedef std::vector FloatVector; typedef std::vector::iterator FloatVectorIt; typedef std::vector::const_iterator FloatVectorConstIt; typedef std::list FloatList; typedef std::list::iterator FloatListIt; typedef std::list::const_iterator FloatListConstIt; /*! wstring */ typedef std::vector WStringVector; typedef std::vector::iterator WStringVectorIt; typedef std::vector::const_iterator WStringVectorConstIt; typedef std::list WStringList; typedef std::list::iterator WStringListIt; typedef std::list::const_iterator WStringListConstIt; /*! string */ typedef std::vector StringVector; typedef std::vector::iterator StringVectorIt; typedef std::vector::const_iterator StringVectorConstIt; typedef std::list StringList; typedef std::list::iterator StringListIt; typedef std::list::const_iterator StringListConstIt; /*! bool */ typedef std::list BoolList; typedef std::list::iterator BoolListIt; typedef std::list::const_iterator BoolListConstIt; typedef std::vector BoolVector; typedef std::vector::iterator BoolVectorIt; typedef std::vector::const_iterator BoolVectorConstIt; #endif // _AffyStlCollectionTypes_HEADER_ affxparser/src/fusion/calvin_files/utils/src/AffymetrixGuid.cpp0000644000175200017520000000554714516003651026063 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifdef _MSC_VER #include "windows.h" #endif #include "calvin_files/utils/src/AffymetrixGuid.h" // #include "calvin_files/utils/src/checksum.h" // #include #include #include #include #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) // ignore deprecated functions warning #include #define getpid _getpid #endif #ifndef _MSC_VER #include #endif using namespace affymetrix_calvin_utilities; /* * Initialize the seed */ void InitializeRandomNumberGenerator() { // Get the current time as a starting point for a seed. u_int64_t seed = (u_int64_t) time(NULL); // Multiply the time by its address and the process id. seed *= (u_int64_t)(&seed); seed *= (u_int64_t)getpid(); // Initialize the random number generator. srand((unsigned int)seed); } /* * Initialize the socket library. */ AffymetrixGuid::AffymetrixGuid() { #ifdef _MSC_VER // initialize Winsock library WSADATA wsaData; WORD wVersionRequested = MAKEWORD(1, 1); WSAStartup(wVersionRequested, &wsaData); #endif InitializeRandomNumberGenerator(); } /* * Clean up the socket library. */ AffymetrixGuid::~AffymetrixGuid() { #ifdef _MSC_VER WSACleanup(); #endif } /* * Create a new guid based on the host name, current time and random numbers. * A checksum of the string values are taken so as to remove any user * interpretive information. This should strictly just be an identifier. */ AffymetrixGuidType AffymetrixGuid::GenerateNewGuid() { const int GUID_LENGTH = 54; char guid[GUID_LENGTH+1]; time_t currentTime; const int MAX_HOST_NAME = 64; char hostname[MAX_HOST_NAME]; currentTime = time(NULL); gethostname(hostname, MAX_HOST_NAME); sprintf(guid, "%10d-%10d-%10d-%10d-%10d", CheckSum::OnesComplementCheckSum(hostname, strlen(hostname)/2), (int) currentTime, rand(), rand(), rand()); guid[GUID_LENGTH] = 0; for (int i=0; i #include #include // namespace affymetrix_calvin_utilities { /*! The GUID type */ typedef std::string AffymetrixGuidType; /*! An STL list of guid types */ typedef std::list AffymetrixGuidTypeList; /*! This class provides functions for creating globally unique identifiers. */ class AffymetrixGuid { public: /*! Constructor */ AffymetrixGuid(); /*! Destructor */ ~AffymetrixGuid(); /*! Generates a new GUID. * * @return The new GUID. */ static AffymetrixGuidType GenerateNewGuid(); }; }; #endif // _AffymetrixGuid_HEADER_ affxparser/src/fusion/calvin_files/utils/src/Calvin.cpp0000644000175200017520000012132514516003651024341 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/utils/src/Calvin.h" // #include "util/AffxMultiDimensionalArray.h" #include "util/Fs.h" #include "util/Util.h" // #include // void Calvin::test() { Verbose::out(1, "*"); std::set setIgnore; std::map mapEpsilon; // Header parameters to ignore setIgnore.insert("FileCreationTime"); setIgnore.insert("FileIdentifier"); setIgnore.insert("program-version"); setIgnore.insert("create_date"); setIgnore.insert("create-date"); setIgnore.insert("affymetrix-algorithm-param-option-verbose"); setIgnore.insert("affymetrix-algorithm-param-option-exec-guid"); setIgnore.insert("affymetrix-algorithm-param-option-program-cvs-id"); setIgnore.insert("affymetrix-algorithm-param-option-version-to-report"); setIgnore.insert("affymetrix-algorithm-param-option-command-line"); setIgnore.insert("affymetrix-algorithm-param-option-run-probeset-genotype"); setIgnore.insert("affymetrix-algorithm-param-option-cels"); setIgnore.insert("affymetrix-algorithm-param-option-out-dir"); setIgnore.insert("affymetrix-algorithm-param-start-time-start"); setIgnore.insert("affymetrix-algorithm-param-start-free-mem-at-start"); // Data Set columns to ignore (Specify as Group.Set.Column). setIgnore.insert("Segments.CN.SegmentID"); setIgnore.insert("Segments.LOH.SegmentID"); setIgnore.insert("Segments.CNNeutralLOH.SegmentID"); setIgnore.insert("Segments.NormalDiploid.SegmentID"); setIgnore.insert("Segments.Mosaicism.SegmentID"); setIgnore.insert("Segments.NoCall.SegmentID"); AffxString strFileName1 = "c:/apt/affy/sdk/copynumber/apt-copynumber-workflow/regression/test-generated/GenomeWideSNP_6/copynumber_HapMap270_6_cels/NA07029_GW6_C.CN5.cnchp"; AffxString strFileName2 = "c:/apt/affy/sdk/regression-data/data/copy-number/GenomeWideSNP_6/copynumber_HapMap270_6_cels/NA07029_GW6_C.CN5.cnchp"; Calvin::equivalent(strFileName1, strFileName2, setIgnore, mapEpsilon, (float)0.0001, false); Verbose::out(1, "*"); } /** * Read the Calvin file, and store the data in memory. * @param strFileName - The name of the Calvin file to load data from. * @return - bool value. (true if successful) */ bool Calvin::load(const AffxString& strFileName) { bool bSuccessful = false; AffxString strValue; int8_t c = 0; unsigned char uc = 0; short n = 0; unsigned short un = 0; int i = 0; unsigned int ui = 0; float f = 0; std::wstring wstr; clear(); affymetrix_calvin_io::GenericFileReader reader; affymetrix_calvin_io::GenericData genericData; try { m_strFileName = strFileName; // Verbose::out(1, "*"); Verbose::out(1, "Reading Calvin file: " + m_strFileName); reader.SetFilename(m_strFileName); reader.Open(genericData); getCalvinHeader().add(new CalvinParameter("FileCreationTime", CalvinValue::TextType, StringUtils::ConvertWCSToMBS(genericData.Header().GetGenericDataHdr()->GetFileCreationTime()))); affymetrix_calvin_io::GenericDataHeader* pHeader = genericData.Header().GetGenericDataHdr(); loadHeader(*pHeader, getCalvinHeader()); int iParentCount = genericData.Header().GetGenericDataHdr()->GetParentCnt(); for (int iParentIndex = 0; (iParentIndex < iParentCount); iParentIndex++) { CalvinHeader* pHeader = new CalvinHeader; getParentCalvinHeaders().add(pHeader); affymetrix_calvin_io::GenericDataHeader Header = genericData.Header().GetGenericDataHdr()->GetParent(iParentIndex); loadHeader(Header, *pHeader); } genericData.Header().GetGenericDataHdr()->Clear(); WStringVector vDataGroupNames; genericData.DataGroupNames(vDataGroupNames); for (unsigned int uiDataGroupIndex = 0; (uiDataGroupIndex < vDataGroupNames.size()); uiDataGroupIndex++) { CalvinDataGroup* pGroup = new CalvinDataGroup; getCalvinDataGroups().add(pGroup); pGroup->setName(StringUtils::ConvertWCSToMBS(vDataGroupNames[uiDataGroupIndex])); WStringVector vDataSetNames; genericData.DataSetNames(uiDataGroupIndex, vDataSetNames); for (unsigned int uiDataSetIndex = 0; (uiDataSetIndex < vDataSetNames.size()); uiDataSetIndex++) { CalvinDataSet* pSet = new CalvinDataSet; pGroup->getCalvinDataSets().add(pSet); affymetrix_calvin_io::DataSet* pDataSet = genericData.DataSet(uiDataGroupIndex, uiDataSetIndex); pDataSet->Open(); pSet->setName(StringUtils::ConvertWCSToMBS(vDataSetNames[uiDataSetIndex])); for (int uiColIndex = 0; (uiColIndex < pDataSet->Cols()); uiColIndex++) { pSet->getColumnHeaders().add(new AffxString(StringUtils::ConvertWCSToMBS(pDataSet->Header().GetColumnInfo(uiColIndex).GetName()))); } pSet->getCalvinValues().reserve(pDataSet->Rows()); for (int uiRowIndex = 0; (uiRowIndex < pDataSet->Rows()); uiRowIndex++) { AffxArray* pRow = new AffxArray(0, pDataSet->Cols()); pSet->getCalvinValues().add(pRow); for (int uiColIndex = 0; (uiColIndex < pDataSet->Cols()); uiColIndex++) { strValue.clear(); switch (pDataSet->Header().GetColumnInfo(uiColIndex).GetColumnType()) { case CalvinValue::Int8Type: pDataSet->GetData(uiRowIndex, uiColIndex, c); strValue = ::getInt(c); break; case CalvinValue::UInt8Type: pDataSet->GetData(uiRowIndex, uiColIndex, uc); strValue = ::getInt(uc); break; case CalvinValue::Int16Type: pDataSet->GetData(uiRowIndex, uiColIndex, n); strValue = ::getInt(n); break; case CalvinValue::UInt16Type: pDataSet->GetData(uiRowIndex, uiColIndex, un); strValue = ::getInt(un); break; case CalvinValue::Int32Type: pDataSet->GetData(uiRowIndex, uiColIndex, i); strValue = ::getInt(i); break; case CalvinValue::UInt32Type: pDataSet->GetData((int)uiRowIndex, (int)uiColIndex, ui); strValue = ::getUnsignedInt(ui); break; case CalvinValue::FloatType: pDataSet->GetData(uiRowIndex, uiColIndex, f); strValue = ::getDouble(f, 10); break; case 7: pDataSet->GetData(uiRowIndex, uiColIndex, strValue); break; case 8: pDataSet->GetData(uiRowIndex, uiColIndex, wstr); strValue = StringUtils::ConvertWCSToMBS(wstr); break; default: break; } pRow->add(new CalvinValue((CalvinValue::ParameterType)pDataSet->Header().GetColumnInfo(uiColIndex).GetColumnType(), strValue)); } } pDataSet->Close(); pDataSet->Delete(); } } genericData.Clear(); bSuccessful = true; } catch(...) {genericData.Clear(); Err::errAbort("Cannot read Calvin file: " + strFileName);} return bSuccessful; } /** * Load the Calvin header into memory. * @param Header - A pointer to the affymetrix_calvin_io::GenericDataHeader for the specified Calvin file. */ void Calvin::loadHeader(affymetrix_calvin_io::GenericDataHeader& Header, CalvinHeader& header) { AffxString strValue; header.reserve(Header.GetNameValParamCnt() + 4); //File header header.add(new CalvinParameter("FileIdentifier", CalvinValue::AsciiType, Header.GetFileId())); header.add(new CalvinParameter("FileTypeIdentifier", CalvinValue::AsciiType, Header.GetFileTypeId())); header.add(new CalvinParameter("FileLocale", CalvinValue::TextType, StringUtils::ConvertWCSToMBS(Header.GetLocale()))); // Header Parameters int iParamCount = Header.GetNameValParamCnt(); affymetrix_calvin_parameter::ParameterNameValueType param; for (int iParamIndex = 0; (iParamIndex < iParamCount); iParamIndex++) { param = Header.GetNameValParam(iParamIndex); strValue.clear(); switch (param.GetParameterType()) { case CalvinValue::Int8Type: strValue = ::getInt(param.GetValueInt8()); break; case CalvinValue::UInt8Type: strValue = ::getInt(param.GetValueUInt8()); break; case CalvinValue::Int16Type: strValue = ::getInt(param.GetValueInt16()); break; case CalvinValue::UInt16Type: strValue = ::getInt(param.GetValueUInt16()); break; case CalvinValue::Int32Type: strValue = ::getInt(param.GetValueInt32()); break; case CalvinValue::UInt32Type: strValue = ::getUnsignedInt(param.GetValueUInt32()); break; case CalvinValue::FloatType: strValue = ::getDouble(param.GetValueFloat(), 10); break; case CalvinValue::AsciiType: strValue = param.GetValueAscii(); break; case CalvinValue::TextType: strValue = StringUtils::ConvertWCSToMBS(param.GetValueText()); break; default: break; } header.add(new CalvinParameter(StringUtils::ConvertWCSToMBS(param.GetName()), (CalvinValue::ParameterType)param.GetParameterType(), strValue)); } } bool Calvin::equivalent(Calvin& that, std::set& setIgnore, std::map& mapEpsilon, float fEpsilon, bool bCheckHeader, float fFraction) { if (bCheckHeader) { if (!equivalentHeader("File ", getCalvinHeader(), that.getCalvinHeader(), setIgnore, mapEpsilon, fEpsilon, fFraction)) { return false; } if (getParentCalvinHeaders().getCount() != that.getParentCalvinHeaders().getCount()) { Verbose::out(1, "Files do not have the same number of Parent Headers."); return false; } for (int iParentIndex = 0; (iParentIndex < getParentCalvinHeaders().getCount()); iParentIndex++) { if (!equivalentHeader("Parent " + ::getInt(iParentIndex + 1) + " ", *this->getParentCalvinHeaders().getAt(iParentIndex), *that.getParentCalvinHeaders().getAt(iParentIndex), setIgnore, mapEpsilon, fEpsilon, fFraction)) { return false; } } } if (this->getCalvinDataGroups().getCount() != that.getCalvinDataGroups().getCount()) { Verbose::out(1, "Files do not have the same number of Data Groups."); return false; } for (int iGroupIndex = 0; (iGroupIndex < getCalvinDataGroups().getCount()); iGroupIndex++) { CalvinDataGroup* pThisGroup = this->getCalvinDataGroups().getAt(iGroupIndex); CalvinDataGroup* pThatGroup = that.getCalvinDataGroups().getAt(iGroupIndex); if (pThisGroup->getName() != pThatGroup->getName()) { Verbose::out(1, "File Data Group name mismatch: " + pThisGroup->getName() + " != " + pThatGroup->getName()); return false; } if (pThisGroup->getCalvinDataSets().getCount() != pThatGroup->getCalvinDataSets().getCount()) { Verbose::out(1, "Data Group " + pThisGroup->getName() + " does not have the same number of Data Sets."); return false; } for (int iSetIndex = 0; (iSetIndex < pThisGroup->getCalvinDataSets().getCount()); iSetIndex++) { CalvinDataSet* pThisSet = pThisGroup->getCalvinDataSets().getAt(iSetIndex); CalvinDataSet* pThatSet = pThatGroup->getCalvinDataSets().getAt(iSetIndex); if (pThisSet->getName() != pThatSet->getName()) { Verbose::out(1, "File Data Set name mismatch: " + pThisGroup->getName() + "." + pThisSet->getName() + " != " + pThatGroup->getName() + "." + pThatSet->getName()); return false; } if (pThisSet->getColumnHeaders().getCount() != pThatSet->getColumnHeaders().getCount()) { Verbose::out(1, "Data Set " + pThisGroup->getName() + "." + pThisSet->getName() + " does not have the same number of Columns."); return false; } if (pThisSet->getCalvinValues().getCount() != pThatSet->getCalvinValues().getCount()) { Verbose::out(1, "Data Set " + pThisGroup->getName() + "." + pThisSet->getName() + " does not have the same number of Rows."); return false; } for (int iColIndex = 0; (iColIndex < pThisSet->getColumnHeaders().getCount()); iColIndex++) { if (*pThisSet->getColumnHeaders().getAt(iColIndex) != *pThatSet->getColumnHeaders().getAt(iColIndex)) { Verbose::out(1, "File Data Set column name mismatch: " + pThisGroup->getName() + "." + pThisSet->getName() + "." + *pThisSet->getColumnHeaders().getAt(iColIndex) + " != " + pThatGroup->getName() + "." + pThatSet->getName() + "." + *pThatSet->getColumnHeaders().getAt(iColIndex)); return false; } } for (int iRowIndex = 0; (iRowIndex < pThisSet->getCalvinValues().getCount()); iRowIndex++) { if (pThisSet->getCalvinValues().getAt(iRowIndex)->getCount() != pThatSet->getCalvinValues().getAt(iRowIndex)->getCount()) { Verbose::out(1, "Data Set " + pThisGroup->getName() + "." + pThisSet->getName() + " is malformed."); return false; } for (int iColIndex = 0; (iColIndex < pThisSet->getCalvinValues().getAt(iRowIndex)->getCount()); iColIndex++) { CalvinValue* pThis = pThisSet->getCalvinValues().getAt(iRowIndex)->getAt(iColIndex); CalvinValue* pThat = pThatSet->getCalvinValues().getAt(iRowIndex)->getAt(iColIndex); if (pThis->getParameterType() != pThat->getParameterType()) { Verbose::out(1, "Value Type mismatch for Data Set Column " + pThisGroup->getName() + "." + pThisSet->getName() + "." + *pThisSet->getColumnHeaders().getAt(iColIndex)); return false; } if (pThis->getParameterType() == CalvinValue::FloatType) { float fThis = (float)::getDouble(pThis->getValue()); float fThat = (float)::getDouble(pThat->getValue()); // allowed absolute difference from fractional tolerance (zero by default) float fEpsilon2 = fFraction*Max( fabs(fThis), fabs(fThat) ); // absolute difference is acceptable if it satisfies either (least restrictive) tolerance if (fabs(fThis - fThat) > Max(fEpsilon,fEpsilon2) ) { Verbose::out(1, "Value is out of spec. for Data Set Column " + pThisGroup->getName() + "." + pThisSet->getName() + "." + *pThisSet->getColumnHeaders().getAt(iColIndex) + "\tDifference = " + ::getDouble(fabs(fThis - fThat), 10)); return false; } } else if (pThis->getValue() != pThat->getValue()) { Verbose::out(1, "Value mismatch for Data Set Column " + pThisGroup->getName() + "." + pThisSet->getName() + "." + *pThisSet->getColumnHeaders().getAt(iColIndex) + "\t" + pThis->getValue() + " != " + pThat->getValue()); return false; } } } } } return true; } bool Calvin::equivalentHeader(const AffxString& strPrompt, CalvinHeader& headerThis, CalvinHeader& headerThat, std::set& setIgnore, std::map& mapEpsilon, float fEpsilon, float fFraction) { if (headerThis.getCount() != headerThat.getCount()) { Verbose::out(1, "WARNING: " + strPrompt + "Header does not have the same number of Parameters."); // return false; } // Note that this check allows the 'that' header to have more items in it than the 'this' header. // Assuming that the 'this' header is the gold data and the 'that' header is the test data. headerThat.quickSort(0); // By Parameter Name for (int iIndex = 0; (iIndex < headerThis.getCount()); iIndex++) { CalvinParameter* pThis = headerThis.getAt(iIndex); int iSearchIndex = headerThat.binarySearch(*pThis, 0); // By Parameter Name if (iSearchIndex != -1) { CalvinParameter* pThat = headerThat.getAt(iSearchIndex); if (!equivalentParameter(strPrompt, *pThis, *pThat, setIgnore, mapEpsilon, fEpsilon, fFraction)) { return false; } } else { Verbose::out(1, strPrompt + "Header parameter " + pThis->getName() + " is missing."); return false; } } return true; } bool Calvin::equivalentParameter(const AffxString& strPrompt, CalvinParameter& This, CalvinParameter& That, std::set& setIgnore, std::map& mapEpsilon, float fEpsilon, float fFraction) { if (This.getName() != That.getName()) { Verbose::out(1, strPrompt + "Header Parameter name mismatch: " + This.getName() + " != " + That.getName()); return false; } if (This.getParameterType() != That.getParameterType()) { Verbose::out(1, strPrompt + "Header Parameter Type mismatch for parameter: " + This.getName()); return false; } if (setIgnore.find(This.getName()) == setIgnore.end()) { float fEps = fEpsilon; if (mapEpsilon.find(This.getName()) != mapEpsilon.end()) {fEps = mapEpsilon.find(This.getName())->second;} if (This.getParameterType() == CalvinValue::FloatType) { float fThis = (float)::getDouble(This.getValue()); float fThat = (float)::getDouble(That.getValue()); // allowed absolute difference from fractional tolerance (zero by default) float fEpsilon2 = fFraction*Max( fabs(fThis), fabs(fThat) ); // absolute difference is acceptable if it satisfies either (least restrictive) tolerance if (fabs(fThis - fThat) > Max(fEps,fEpsilon2)) { Verbose::out(1, strPrompt + "Header Parameter Value is out of spec. for " + This.getName() + "\tDifference = " + ::getDouble(fabs(fThis - fThat), 10) + "\tEpsilon = " + ::getDouble(fEps, 10)); return false; } } else if (This.getValue() != That.getValue()) { Verbose::out(1, strPrompt + "Header Parameter Value mismatch for " + This.getName() + "\t" + This.getValue() + " != " + That.getValue()); return false; } } return true; } // We have two functions named equivalent with different signatures. Extra functionality of setSetIgnore was added. // The signatures are maintained for backwards compatibility. bool Calvin::equivalent( const AffxString& strFileName1, const AffxString& strFileName2, std::set& setIgnore, std::map& mapEpsilon, float fEpsilon, double dCorrelationCutoff, bool bCheckHeader, int iMessageLimit, float fFraction) { std::set setSetIgnore; return equivalent( strFileName1, strFileName2, setIgnore, setSetIgnore, mapEpsilon, fEpsilon, dCorrelationCutoff, bCheckHeader, iMessageLimit, fFraction); } bool Calvin::equivalent( const AffxString& strFileName1, const AffxString& strFileName2, std::set& setIgnore, std::set& setSetIgnore, std::map& mapEpsilon, float fEpsilon, double dCorrelationCutoff, bool bCheckHeader, int iMessageLimit, float fFraction) { Verbose::out(1, "Comparing " + strFileName1 + " and " + strFileName2); bool bSuccessful = true; bool bFileOpen1 = false; bool bFileOpen2 = false; AffxString str1; AffxString str2; int8_t c1 = 0; int8_t c2 = 0; unsigned char uc1 = 0; unsigned char uc2 = 0; short n1 = 0; short n2 = 0; unsigned short un1 = 0; unsigned short un2 = 0; int i1 = 0; int i2 = 0; unsigned int ui1 = 0; unsigned int ui2 = 0; float f1 = 0; float f2 = 0; float fEpsilon2 = 0; std::wstring wstr1; std::wstring wstr2; affymetrix_calvin_io::GenericFileReader reader1; affymetrix_calvin_io::GenericData genericData1; affymetrix_calvin_io::GenericFileReader reader2; affymetrix_calvin_io::GenericData genericData2; affymetrix_calvin_io::DataSet* pDataSet1 = NULL; affymetrix_calvin_io::DataSet* pDataSet2 = NULL; try { reader1.SetFilename(Fs::convertToUncPath(strFileName1)); reader1.Open(genericData1); bFileOpen1 = true; // remember status for file exception catch block //TODO file exception try/catch block for file1 should move here per Harley reader2.SetFilename(Fs::convertToUncPath(strFileName2)); reader2.Open(genericData2); bFileOpen2 = true; // remember status for file exception catch block //TODO file exception try/catch block for file2 should move here per Harley if (bCheckHeader) { CalvinParameter param1("FileCreationTime", CalvinValue::TextType, StringUtils::ConvertWCSToMBS(genericData1.Header().GetGenericDataHdr()->GetFileCreationTime())); CalvinParameter param2("FileCreationTime", CalvinValue::TextType, StringUtils::ConvertWCSToMBS(genericData2.Header().GetGenericDataHdr()->GetFileCreationTime())); if (!equivalentParameter("File ", param1, param1, setIgnore, mapEpsilon, fEpsilon, fFraction)) { return false; } affymetrix_calvin_io::GenericDataHeader* pHeader1 = genericData1.Header().GetGenericDataHdr(); affymetrix_calvin_io::GenericDataHeader* pHeader2 = genericData2.Header().GetGenericDataHdr(); if (!equivalentHeader("File ", *pHeader1, *pHeader2, setIgnore, mapEpsilon, fEpsilon, fFraction)) { return false; } int iParentCount1 = genericData1.Header().GetGenericDataHdr()->GetParentCnt(); int iParentCount2 = genericData2.Header().GetGenericDataHdr()->GetParentCnt(); int iParentCount = Min(iParentCount1, iParentCount2); if (iParentCount1 != iParentCount2) { Verbose::out(1, "Files do not have the same number of Parent Headers."); return false; } for (int iParentIndex = 0; (iParentIndex < iParentCount); iParentIndex++) { affymetrix_calvin_io::GenericDataHeader Header1 = genericData1.Header().GetGenericDataHdr()->GetParent(iParentIndex); affymetrix_calvin_io::GenericDataHeader Header2 = genericData2.Header().GetGenericDataHdr()->GetParent(iParentIndex); if (!equivalentHeader("Parent " + ::getInt(iParentIndex + 1) + " ", Header1, Header2, setIgnore, mapEpsilon, fEpsilon, fFraction)) { bSuccessful = false; } } } genericData1.Header().GetGenericDataHdr()->Clear(); genericData2.Header().GetGenericDataHdr()->Clear(); WStringVector vDataGroupNames1; WStringVector vDataGroupNames2; genericData1.DataGroupNames(vDataGroupNames1); genericData2.DataGroupNames(vDataGroupNames2); unsigned int uiDataGroupCount = Min(vDataGroupNames1.size(), vDataGroupNames2.size()); if (vDataGroupNames1.size() != vDataGroupNames2.size()) { Verbose::out(1, "Files do not have the same number of Data Groups. Gold = " + ::getInt(vDataGroupNames1.size()) + ", generated = " + ::getInt(vDataGroupNames2.size())); bSuccessful = false; } for (unsigned int uiDataGroupIndex = 0; (uiDataGroupIndex < uiDataGroupCount); uiDataGroupIndex++) { if (vDataGroupNames1[uiDataGroupIndex] != vDataGroupNames2[uiDataGroupIndex]) { Verbose::out(1, "File Data Group name mismatch: " + StringUtils::ConvertWCSToMBS(vDataGroupNames1[uiDataGroupIndex]) + " != " + StringUtils::ConvertWCSToMBS(vDataGroupNames2[uiDataGroupIndex])); bSuccessful = false; continue; } WStringVector vDataSetNames1; WStringVector vDataSetNames2; genericData1.DataSetNames(uiDataGroupIndex, vDataSetNames1); genericData2.DataSetNames(uiDataGroupIndex, vDataSetNames2); unsigned int uiDataSetCount = Min(vDataSetNames1.size(), vDataSetNames2.size()); if (vDataSetNames1.size() != vDataSetNames2.size()) { Verbose::out(1, "Data Group " + StringUtils::ConvertWCSToMBS(vDataGroupNames1[uiDataGroupIndex]) + " does not have the same number of Data Sets."); bSuccessful = false; continue; } for (unsigned int uiDataSetIndex = 0; (uiDataSetIndex < uiDataSetCount); uiDataSetIndex++) { //Verbose::out(1, "Data Set " + StringUtils::ConvertWCSToMBS(vDataGroupNames1[uiDataGroupIndex])); pDataSet1 = genericData1.DataSet(uiDataGroupIndex, uiDataSetIndex); pDataSet2 = genericData2.DataSet(uiDataGroupIndex, uiDataSetIndex); pDataSet1->Open(); pDataSet2->Open(); if (vDataSetNames1[uiDataSetIndex] != vDataSetNames2[uiDataSetIndex]) { Verbose::out(1, "File Data Set name mismatch: " + StringUtils::ConvertWCSToMBS(vDataGroupNames1[uiDataGroupIndex]) + "." + StringUtils::ConvertWCSToMBS(vDataSetNames1[uiDataSetIndex]) + " != " + StringUtils::ConvertWCSToMBS(vDataGroupNames2[uiDataGroupIndex]) + "." + StringUtils::ConvertWCSToMBS(vDataSetNames2[uiDataSetIndex])); pDataSet1->Delete(); pDataSet2->Delete(); bSuccessful = false; continue; } //MG AffxString strSetName = StringUtils::ConvertWCSToMBS(vDataGroupNames1[uiDataGroupIndex]) + "." + StringUtils::ConvertWCSToMBS(vDataSetNames1[uiDataSetIndex]) ; if (setSetIgnore.find(strSetName) != setSetIgnore.end()) { Verbose::out(1, "Not doing a comparison of the DataSet" + strSetName); continue; } if (pDataSet1->Cols() != pDataSet2->Cols()) { Verbose::out(1, "Data Set " + StringUtils::ConvertWCSToMBS(vDataGroupNames1[uiDataGroupIndex]) + "." + StringUtils::ConvertWCSToMBS(vDataSetNames1[uiDataSetIndex]) + " does not have the same number of Columns. Gold = " + ::getInt(pDataSet1->Cols()) + ", Generated = " + ::getInt(pDataSet2->Cols())); pDataSet1->Delete(); pDataSet2->Delete(); bSuccessful = false; continue; } if (pDataSet1->Rows() != pDataSet2->Rows()) { Verbose::out(1, "Data Set " + StringUtils::ConvertWCSToMBS(vDataGroupNames1[uiDataGroupIndex]) + "." + StringUtils::ConvertWCSToMBS(vDataSetNames1[uiDataSetIndex]) + " does not have the same number of Rows. Gold = " + ::getInt(pDataSet1->Rows()) + ", Generated = " + ::getInt(pDataSet2->Rows())); pDataSet1->Delete(); pDataSet2->Delete(); bSuccessful = false; continue; } AffxMultiDimensionalArray vSkip(pDataSet1->Cols()); AffxMultiDimensionalArray vEpsilon(pDataSet1->Cols()); for (int uiColIndex = 0; (uiColIndex < pDataSet1->Cols()); uiColIndex++) { if (pDataSet1->Header().GetColumnInfo(uiColIndex).GetName() != pDataSet2->Header().GetColumnInfo(uiColIndex).GetName()) { Verbose::out(1, "File Data Set column name mismatch: " + StringUtils::ConvertWCSToMBS(vDataGroupNames1[uiDataGroupIndex]) + "." + StringUtils::ConvertWCSToMBS(vDataSetNames1[uiDataSetIndex]) + "." + StringUtils::ConvertWCSToMBS(pDataSet1->Header().GetColumnInfo(uiColIndex).GetName()) + " != " + StringUtils::ConvertWCSToMBS(vDataGroupNames2[uiDataGroupIndex]) + "." + StringUtils::ConvertWCSToMBS(vDataSetNames2[uiDataSetIndex]) + "." + StringUtils::ConvertWCSToMBS(pDataSet2->Header().GetColumnInfo(uiColIndex).GetName())); // pDataSet1->Delete(); // pDataSet2->Delete(); bSuccessful = false; continue; } else { AffxString strColumnName = StringUtils::ConvertWCSToMBS(vDataGroupNames1[uiDataGroupIndex]) + "." + StringUtils::ConvertWCSToMBS(vDataSetNames1[uiDataSetIndex]) + "." + StringUtils::ConvertWCSToMBS(pDataSet1->Header().GetColumnInfo(uiColIndex).GetName()); if (setIgnore.find(strColumnName) != setIgnore.end()) {vSkip.set(uiColIndex, true);} if (mapEpsilon.find(strColumnName) != mapEpsilon.end()) {vEpsilon.set(uiColIndex, mapEpsilon.find(strColumnName)->second);} else {vEpsilon.set(uiColIndex, fEpsilon);} } } AffxMultiDimensionalArray vMaxDifference(pDataSet1->Cols()); AffxMultiDimensionalArray vMaxDifferenceRow(pDataSet1->Cols()); int iMessageCount = 0; for (int uiColIndex = 0; (uiColIndex < pDataSet1->Cols()); uiColIndex++) { //Verbose::out(1, "Column: " + StringUtils::ConvertWCSToMBS(pDataSet1->Header().GetColumnInfo(uiColIndex).GetName()) + "\t" + ::getInt(pDataSet1->Header().GetColumnInfo(uiColIndex).GetColumnType())); int iFailureCount = 0; AffxMultiDimensionalArray mx(pDataSet1->Rows(), 2); for (int uiRowIndex = 0; (uiRowIndex < pDataSet1->Rows()); uiRowIndex++) { if (pDataSet1->Header().GetColumnInfo(uiColIndex).GetColumnType() != pDataSet2->Header().GetColumnInfo(uiColIndex).GetColumnType()) { Verbose::out(1, "Value Type mismatch for Data Set Column " + StringUtils::ConvertWCSToMBS(vDataGroupNames1[uiDataGroupIndex]) + "." + StringUtils::ConvertWCSToMBS(vDataSetNames1[uiDataSetIndex]) + "." + StringUtils::ConvertWCSToMBS(pDataSet1->Header().GetColumnInfo(uiColIndex).GetName()) + ", Gold = " + ::getInt(pDataSet1->Header().GetColumnInfo(uiColIndex).GetColumnType()) + ", Generated = " + ::getInt(pDataSet2->Header().GetColumnInfo(uiColIndex).GetColumnType())); // pDataSet1->Delete(); // pDataSet2->Delete(); bSuccessful = false; break; } if (vSkip.get(uiColIndex)) {continue;} bool bEquals = false; switch (pDataSet1->Header().GetColumnInfo(uiColIndex).GetColumnType()) { case CalvinValue::Int8Type: pDataSet1->GetData((int)uiRowIndex, (int)uiColIndex, c1); pDataSet2->GetData((int)uiRowIndex, (int)uiColIndex, c2); bEquals = (c1 == c2); break; case CalvinValue::UInt8Type: pDataSet1->GetData((int)uiRowIndex, (int)uiColIndex, uc1); pDataSet2->GetData((int)uiRowIndex, (int)uiColIndex, uc2); bEquals = (uc1 == uc2); break; case CalvinValue::Int16Type: pDataSet1->GetData((int)uiRowIndex, (int)uiColIndex, n1); pDataSet2->GetData((int)uiRowIndex, (int)uiColIndex, n2); f1 = (float)n1; f2 = (float)n2; if ((Util::isFinite(f1)) && (Util::isFinite(f2))) { mx.set(uiRowIndex, 0, f1); mx.set(uiRowIndex, 1, f2); } if ((f1 != f1) && (f2 != f2)) {bEquals = true;} // Both = NaN else {bEquals = (fabs(f1 - f2) <= vEpsilon.get(uiColIndex));} break; case CalvinValue::UInt16Type: pDataSet1->GetData((int)uiRowIndex, (int)uiColIndex, un1); pDataSet2->GetData((int)uiRowIndex, (int)uiColIndex, un2); f1 = (float)un1; f2 = (float)un2; if ((Util::isFinite(f1)) && (Util::isFinite(f2))) { mx.set(uiRowIndex, 0, f1); mx.set(uiRowIndex, 1, f2); } if ((f1 != f1) && (f2 != f2)) {bEquals = true;} // Both = NaN else {bEquals = (fabs(f1 - f2) <= vEpsilon.get(uiColIndex));} break; case CalvinValue::Int32Type: pDataSet1->GetData((int)uiRowIndex, (int)uiColIndex, i1); pDataSet2->GetData((int)uiRowIndex, (int)uiColIndex, i2); f1 = (float)i1; f2 = (float)i2; if ((Util::isFinite(f1)) && (Util::isFinite(f2))) { mx.set(uiRowIndex, 0, f1); mx.set(uiRowIndex, 1, f2); } if ((f1 != f1) && (f2 != f2)) {bEquals = true;} // Both = NaN else {bEquals = (fabs(f1 - f2) <= vEpsilon.get(uiColIndex));} break; case CalvinValue::UInt32Type: pDataSet1->GetData((int)uiRowIndex, (int)uiColIndex, ui1); pDataSet2->GetData((int)uiRowIndex, (int)uiColIndex, ui2); f1 = (float)ui1; f2 = (float)ui2; if ((Util::isFinite(f1)) && (Util::isFinite(f2))) { mx.set(uiRowIndex, 0, f1); mx.set(uiRowIndex, 1, f2); } if ((f1 != f1) && (f2 != f2)) {bEquals = true;} // Both = NaN else {bEquals = (fabs(f1 - f2) <= vEpsilon.get(uiColIndex));} break; case CalvinValue::FloatType: pDataSet1->GetData((int)uiRowIndex, (int)uiColIndex, f1); pDataSet2->GetData((int)uiRowIndex, (int)uiColIndex, f2); fEpsilon2 = fFraction*Max( fabs(f1), fabs(f2) ); if ((Util::isFinite(f1)) && (Util::isFinite(f2))) { mx.set(uiRowIndex, 0, f1); mx.set(uiRowIndex, 1, f2); } if ((f1 != f1) && (f2 != f2)) {bEquals = true;} // Both = NaN else {bEquals = (fabs(f1 - f2) <= Max(vEpsilon.get(uiColIndex),fEpsilon2));} break; case 7: pDataSet1->GetData((int)uiRowIndex, (int)uiColIndex, str1); pDataSet2->GetData((int)uiRowIndex, (int)uiColIndex, str2); bEquals = (str1 == str2); break; case 8: pDataSet1->GetData((int)uiRowIndex, (int)uiColIndex, wstr1); pDataSet2->GetData((int)uiRowIndex, (int)uiColIndex, wstr2); bEquals = (wstr1 == wstr2); break; default: break; } if (!bEquals) { if ((pDataSet1->Header().GetColumnInfo(uiColIndex).GetColumnType() >= 2) && (pDataSet1->Header().GetColumnInfo(uiColIndex).GetColumnType() <= 6)) { iFailureCount++; if (fabs(f1 - f2) > vMaxDifference.get(uiColIndex)) { vMaxDifference.set(uiColIndex, fabs(f1 - f2)); vMaxDifferenceRow.set(uiColIndex, uiRowIndex); } } else { bSuccessful = false; iMessageCount++; if (iMessageCount == (iMessageLimit + 1)) { Verbose::out(1, "Message limit reached. Additional messages will be suppressed."); } else if (iMessageCount <= iMessageLimit) { Verbose::out(1, "Value mismatch for Data Set Column " +StringUtils::ConvertWCSToMBS(vDataGroupNames1[uiDataGroupIndex]) + "." + StringUtils::ConvertWCSToMBS(vDataSetNames1[uiDataSetIndex]) + "." + StringUtils::ConvertWCSToMBS(pDataSet1->Header().GetColumnInfo(uiColIndex).GetName()) + " at row " + ::getUnsignedInt(uiRowIndex)); } } } } if (vMaxDifference.get(uiColIndex) > 0) { double dCorrelation = mx.corr(); if ((dCorrelation < dCorrelationCutoff) || (dCorrelationCutoff == 1)) { bSuccessful = false; Verbose::out(1, "Value is out of spec. for Data Set Column " + StringUtils::ConvertWCSToMBS(vDataGroupNames1[uiDataGroupIndex]) + "." + StringUtils::ConvertWCSToMBS(vDataSetNames1[uiDataSetIndex]) + "." + StringUtils::ConvertWCSToMBS(pDataSet1->Header().GetColumnInfo(uiColIndex).GetName()) + " MaxDifference at row " + ::getUnsignedInt(vMaxDifferenceRow.get(uiColIndex)) + "\tMaxDifference = " + ::getDouble(vMaxDifference.get(uiColIndex), 10) + "\tNumberFailures = " + ::getInt(iFailureCount) + "\tEpsilon = " + ::getDouble(vEpsilon.get(uiColIndex), 10) + "\tCorrelation = " + ::getDouble(dCorrelation, 10) + "\tNormDifference = " + ::getDouble(mx.normDifference(), 10)); } } } pDataSet1->Delete(); pDataSet2->Delete(); } } if (bSuccessful) { Verbose::out(1, "Files are equivalent."); } } /* When things go wrong see if we can die gracefully here. */ catch(Except &e) { bSuccessful = false; Verbose::out(1, "Exception caught. " "Message is: " + ToStr(e.what())); } catch(const std::bad_alloc &e) { bSuccessful = false; Verbose::out(1, "std::bad_alloc caught. " "The application has run out of memory. " "Message is: " + ToStr(e.what())); } catch(affymetrix_calvin_exceptions::FileNotFoundException &fnfe) { // rsatin fix for AFFY00023387: check file existence with message if not found bSuccessful = false; if( !bFileOpen1 ) Verbose::out(1, "File not found: '" + strFileName1 + "'"); else Verbose::out(1, "File not found: '" + strFileName2 + "'"); } catch(affymetrix_calvin_exceptions::UnableToOpenFileException &utoe) { // rsatin fix for AFFY00023387: check file existence with message if not found bSuccessful = false; if( !bFileOpen1 ) Verbose::out(1, "Unable to open file: '" + strFileName1 + "'"); else Verbose::out(1, "Unable to open file: '" + strFileName2 + "'"); } catch(affymetrix_calvin_exceptions::CalvinException &ce) { std::string strType = typeid(ce).name(); bSuccessful = false; Verbose::out(1, "affymetrix_calvin_exceptions::CalvinException caught. " \ " Affymetrix GeneChip Command Console library has thrown an exception of type " + strType + \ " Message is: " + affymetrix_calvin_utilities::StringUtils::ConvertWCSToMBS(ce.Description())); } catch(const std::exception &e) { bSuccessful = false; Verbose::out(1, "std::exception caught. " "Message is: " + ToStr(e.what())); } catch(...) { bSuccessful = false; Verbose::out(1, "Error in equivalency test. Unknown exception thrown."); } return bSuccessful; } /** * Load the Calvin header into memory. * @param pHeader - A pointer to the affymetrix_calvin_io::GenericDataHeader for the specified Calvin file. */ bool Calvin::equivalentHeader(const AffxString& strPrompt, affymetrix_calvin_io::GenericDataHeader& Header1, affymetrix_calvin_io::GenericDataHeader& Header2, std::set& setIgnore, std::map& mapEpsilon, float fEpsilon, float fFraction) { bool bEquivalent = true; AffxString strValue1; AffxString strValue2; //File header CalvinParameter paramCalvin1; CalvinParameter paramCalvin2; paramCalvin1.set("FileIdentifier", CalvinValue::AsciiType, Header1.GetFileId()); paramCalvin2.set("FileIdentifier", CalvinValue::AsciiType, Header2.GetFileId()); if (!equivalentParameter(strPrompt, paramCalvin1, paramCalvin2, setIgnore, mapEpsilon, fEpsilon, fFraction)) {bEquivalent = false;} paramCalvin1.set("FileTypeIdentifier", CalvinValue::AsciiType, Header1.GetFileTypeId()); paramCalvin2.set("FileTypeIdentifier", CalvinValue::AsciiType, Header2.GetFileTypeId()); if (!equivalentParameter(strPrompt, paramCalvin1, paramCalvin2, setIgnore, mapEpsilon, fEpsilon, fFraction)) {bEquivalent = false;} paramCalvin1.set("FileLocale", CalvinValue::TextType, StringUtils::ConvertWCSToMBS(Header1.GetLocale())); paramCalvin2.set("FileLocale", CalvinValue::TextType, StringUtils::ConvertWCSToMBS(Header2.GetLocale())); if (!equivalentParameter(strPrompt, paramCalvin1, paramCalvin2, setIgnore, mapEpsilon, fEpsilon, fFraction)) {bEquivalent = false;} // Header Parameters int iParamCount1 = Header1.GetNameValParamCnt(); int iParamCount2 = Header2.GetNameValParamCnt(); if (iParamCount1 != iParamCount2) { Verbose::out(1, "WARNING: " + strPrompt + "Header does not have the same number of Parameters."); // bEquivalent = false; } affymetrix_calvin_parameter::ParameterNameValueType param1; affymetrix_calvin_parameter::ParameterNameValueType param2; for (int iParamIndex = 0; (iParamIndex < iParamCount1); iParamIndex++) { param1 = Header1.GetNameValParam(iParamIndex); //rsatin fix for AFFY00023387: ignore present/absent differences for fields in ignore list AffxString strParam1Name = StringUtils::ConvertWCSToMBS(param1.GetName()); if (setIgnore.find(strParam1Name) != setIgnore.end()) { continue; } bool bFound = false; for (int iParamIndex2 = 0; (iParamIndex2 < iParamCount2); iParamIndex2++) { param2 = Header2.GetNameValParam(iParamIndex2); if (param1.GetName() == param2.GetName()) { bFound = true; if (param1.GetParameterType() != param2.GetParameterType()) { Verbose::out(1, strPrompt + "Header Parameter Type mismatch for parameter: " + StringUtils::ConvertWCSToMBS(param1.GetName())); return false; } float fFraction_param = 0.0; strValue1.clear(); strValue2.clear(); switch (param1.GetParameterType()) { case CalvinValue::Int8Type: strValue1 = ::getInt(param1.GetValueInt8()); strValue2 = ::getInt(param2.GetValueInt8()); break; case CalvinValue::UInt8Type: strValue1 = ::getInt(param1.GetValueUInt8()); strValue2 = ::getInt(param2.GetValueUInt8()); break; case CalvinValue::Int16Type: strValue1 = ::getInt(param1.GetValueInt16()); strValue2 = ::getInt(param2.GetValueInt16()); break; case CalvinValue::UInt16Type: strValue1 = ::getInt(param1.GetValueUInt16()); strValue2 = ::getInt(param2.GetValueUInt16()); break; case CalvinValue::Int32Type: strValue1 = ::getInt(param1.GetValueInt32()); strValue2 = ::getInt(param2.GetValueInt32()); break; case CalvinValue::UInt32Type: strValue1 = ::getUnsignedInt(param1.GetValueUInt32()); strValue2 = ::getUnsignedInt(param2.GetValueUInt32()); break; case CalvinValue::FloatType: fFraction_param = fFraction; strValue1 = ::getDouble(param1.GetValueFloat(), 10); strValue2 = ::getDouble(param2.GetValueFloat(), 10); break; case CalvinValue::AsciiType: strValue1 = param1.GetValueAscii(); strValue2 = param2.GetValueAscii(); break; case CalvinValue::TextType: strValue1 = StringUtils::ConvertWCSToMBS(param1.GetValueText()); strValue2 = StringUtils::ConvertWCSToMBS(param2.GetValueText()); break; default: break; } paramCalvin1.set(StringUtils::ConvertWCSToMBS(param1.GetName()), (CalvinValue::ParameterType)param1.GetParameterType(), strValue1); paramCalvin2.set(StringUtils::ConvertWCSToMBS(param2.GetName()), (CalvinValue::ParameterType)param2.GetParameterType(), strValue2); if (!equivalentParameter(strPrompt, paramCalvin1, paramCalvin2, setIgnore, mapEpsilon, fEpsilon, fFraction_param)) {return false;} } } if (!bFound) { Verbose::out(1, strPrompt + "Header Parameter name not found: " + StringUtils::ConvertWCSToMBS(param1.GetName())); return false; } } return bEquivalent; } affxparser/src/fusion/calvin_files/utils/src/Calvin.h0000644000175200017520000002405614516003651024011 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _Calvin_H_ #define _Calvin_H_ /** * @file Calvin.h * * @brief This header contains the Calvin class definition. */ #include "calvin_files/data/src/DataGroupHeader.h" #include "calvin_files/data/src/DataSetHeader.h" #include "calvin_files/data/src/GenericData.h" #include "calvin_files/parsers/src/GenericFileReader.h" #include "calvin_files/utils/src/StringUtils.h" #include "calvin_files/writers/src/GenericFileWriter.h" //#include "chipstream/BioTypes.h" - not needed to build lib project #include "file/CHPFileData.h" #include "file/TsvFile/TsvFile.h" #include "util/AffxArray.h" #include "util/AffxString.h" // #include // class CalvinValue { public: /*! Enumerant of the built-in parameter types. */ enum ParameterType { Int8Type, /*! an 8 bit integer. */ UInt8Type, /*! an 8 bit unsigned integer. */ Int16Type, /*! a 16 bit integer. */ UInt16Type, /*! a 16 bit unsigned integer. */ Int32Type, /*! a 32 bit integer. */ UInt32Type, /*! a 32 bit unsigned integer. */ FloatType, /*! a 32 bit floating point. */ TextType, /*! a 16 bit character. */ AsciiType, /*! an 8 bit character. */ UnknownType /*! an 8 bit integer. */ }; protected: ParameterType m_eParameterType; AffxString m_strValue; public: CalvinValue() {clear();} CalvinValue(ParameterType eParameterType, const AffxString& strValue) { m_eParameterType = eParameterType; m_strValue = strValue; } ParameterType getParameterType() {return m_eParameterType;} AffxString& getValue() {return m_strValue;} void clear() {m_eParameterType = UnknownType; m_strValue.clear();} /** * Compare function. * @param that - A reference to an instance of this class. * @param iCompareCode - The code to switch on when doing compares. (Each code is a different compare.) * @return - int value. (-1 if *this < that, 0 if *this == that, 1 if *this > that) */ int compareTo(CalvinValue& that, int iCompareCode) { int iCompareResult = 0; switch (iCompareCode) { case 0: switch (m_eParameterType) { case TextType: // iCompareResult = StringUtils::ConvertWCSToMBS(m_strValue).compareTo(StringUtils::ConvertWCSToMBS(that.m_strValue), iCompareCode); case AsciiType: iCompareResult = m_strValue.compareTo(that.m_strValue, iCompareCode); case FloatType: iCompareResult = AffxArray::compare((float)::getDouble(m_strValue), (float)::getDouble(that.m_strValue)); case UInt32Type: iCompareResult = AffxArray::compare(::getUnsignedInt(m_strValue), ::getUnsignedInt(that.m_strValue)); default: iCompareResult = AffxArray::compare(::getInt(m_strValue), ::getInt(that.m_strValue)); } break; } return iCompareResult; } }; class CalvinParameter : public CalvinValue { protected: AffxString m_strName; public: CalvinParameter() { } CalvinParameter(const AffxString& strName, CalvinValue::ParameterType eParameterType, const AffxString& strValue) : CalvinValue(eParameterType, strValue) { m_strName = strName; } void set(const AffxString& strName, CalvinValue::ParameterType eParameterType, const AffxString& strValue) { m_strName = strName; m_eParameterType = eParameterType; m_strValue = strValue; } AffxString& getName() {return m_strName;} void clear() {m_strName.clear(); CalvinValue::clear();} /** * Compare function. * @param that - A reference to an instance of this class. * @param iCompareCode - The code to switch on when doing compares. (Each code is a different compare.) * @return - int value. (-1 if *this < that, 0 if *this == that, 1 if *this > that) */ int compareTo(CalvinParameter& that, int iCompareCode) { int iCompareResult = 0; switch (iCompareCode) { case 0: iCompareResult = m_strName.compareTo(that.m_strName, 0); // if (iCompareResult == 0) {iCompareResult = this->CalvinValue::compareTo(that, 0);} break; } return iCompareResult; } template struct ComparePred { bool operator()(const CalvinParameter* lhs, const CalvinParameter* rhs) const { Err::errAbort("CalvinParameter: ComparePred instantiated with an invalid compare code = " + ToStr(k)); return false; } }; }; template<> struct CalvinParameter::ComparePred<0> { bool operator()(const CalvinParameter* lhs, const CalvinParameter* rhs) const { return lhs->m_strName.compareTo(rhs->m_strName, 0) < 0; } }; /** * @brief A class for storing Calvin header data. * */ class CalvinHeader : public AffxArray { public: CalvinHeader() { } virtual ~CalvinHeader() { clear(); } void clear() { deleteAll(); } }; class CalvinDataSet { protected: AffxString m_strName; AffxArray m_vColumnHeaders; AffxArray > m_mxCalvinValues; public: CalvinDataSet() { } virtual ~CalvinDataSet() { clear(); } void clear() { m_vColumnHeaders.deleteAll(); for (int iIndex = 0; (iIndex < m_mxCalvinValues.getCount()); iIndex++) { m_mxCalvinValues.getAt(iIndex)->deleteAll(); } m_mxCalvinValues.deleteAll(); } void setName(const AffxString& str) {m_strName = str;} AffxString& getName() {return m_strName;} AffxArray& getColumnHeaders() {return m_vColumnHeaders;} AffxArray >& getCalvinValues() {return m_mxCalvinValues;} /** * Compare function. * @param that - A reference to an instance of this class. * @param iCompareCode - The code to switch on when doing compares. (Each code is a different compare.) * @return - int value. (-1 if *this < that, 0 if *this == that, 1 if *this > that) */ int compareTo(CalvinDataSet& that, int iCompareCode) { int iCompareResult = 0; switch (iCompareCode) { case 0: iCompareResult = m_strName.compareTo(that.m_strName, 0); break; } return iCompareResult; } }; class CalvinDataGroup { protected: AffxString m_strName; AffxArray m_vCalvinDataSets; public: CalvinDataGroup() { } virtual ~CalvinDataGroup() { clear(); } void clear() {m_vCalvinDataSets.deleteAll();} void setName(const AffxString& str) {m_strName = str;} AffxString& getName() {return m_strName;} AffxArray& getCalvinDataSets() {return m_vCalvinDataSets;} /** * Compare function. * @param that - A reference to an instance of this class. * @param iCompareCode - The code to switch on when doing compares. (Each code is a different compare.) * @return - int value. (-1 if *this < that, 0 if *this == that, 1 if *this > that) */ int compareTo(CalvinDataGroup& that, int iCompareCode) { int iCompareResult = 0; switch (iCompareCode) { case 0: iCompareResult = m_strName.compareTo(that.m_strName, 0); break; } return iCompareResult; } }; class Calvin { protected: AffxString m_strFileName; CalvinHeader m_objCalvinHeader; AffxArray m_vParentCalvinHeaders; AffxArray m_vCalvinDataGroups; public: static void test(); Calvin() { } virtual ~Calvin() { clear(); } void clear() {m_objCalvinHeader.clear(); m_vParentCalvinHeaders.deleteAll(); m_vCalvinDataGroups.deleteAll();} AffxString& getFileName() {return m_strFileName;} CalvinHeader& getCalvinHeader() {return m_objCalvinHeader;} AffxArray& getParentCalvinHeaders() {return m_vParentCalvinHeaders;} AffxArray& getCalvinDataGroups() {return m_vCalvinDataGroups;} bool load(const AffxString& strFileName); bool equivalent(Calvin& that, std::set& setIgnore, std::map& mapEpsilon, float fEpsilon = 0.0001, bool bCheckHeader = true, float fFraction = 0.0); static bool equivalent( const AffxString& strFileName1, const AffxString& strFileName2, std::set& setIgnore, std::set& setSetIgnore, std::map& mapEpsilon, float fEpsilon = 0.0001, double dCorrelationCutoff = 1.0, bool bCheckHeader = true, int iMessageLimit = 1000, float fFraction=0.0); static bool equivalent(const AffxString& strFileName1, const AffxString& strFileName2, std::set& setIgnore, std::map& mapEpsilon, float fEpsilon = 0.0001, double dCorrelationCutoff = 1.0, bool bCheckHeader = true, int iMessageLimit = 1000, float fFraction = 0.0); protected: void loadHeader(affymetrix_calvin_io::GenericDataHeader& Header, CalvinHeader& header); bool equivalentHeader(const AffxString& strPrompt, CalvinHeader& headerThis, CalvinHeader& headerThat, std::set& setIgnore, std::map& mapEpsilon, float fEpsilon, float fFraction = 0.0); static bool equivalentParameter(const AffxString& strPrompt, CalvinParameter& This, CalvinParameter& That, std::set& setIgnore, std::map& mapEpsilon, float fEpsilon, float fFraction = 0.0); static bool equivalentHeader(const AffxString& strPrompt, affymetrix_calvin_io::GenericDataHeader& Header1, affymetrix_calvin_io::GenericDataHeader& Header2, std::set& setIgnore, std::map& mapEpsilon, float fEpsilon, float fFraction = 0.0); }; #endif affxparser/src/fusion/calvin_files/utils/src/Coords.h0000644000175200017520000001463014516003651024023 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _Coords_HEADER_ #define _Coords_HEADER_ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include // /*! \file Coords.h This file defines array coordinate types. */ namespace affymetrix_calvin_utilities { /*! Defines a convention for a rectangle mapped onto a region */ enum RectanglePositions{ UpperLeft = 0, /*! The upper left corner. */ UpperRight, /*! The upper right corner. */ LowerRight, /*! The lower right corner. */ LowerLeft /*! The lower left corner. */ }; /*! Defines a floating-point location */ struct FPoint { /*! The x coordinate */ float x; /*! The y coordinate */ float y; /*! Equality test */ bool operator==(const FPoint& lhs)const { return (x==lhs.x && y==lhs.y); } /*! Inequality test */ bool operator!=(const FPoint& lhs)const { return !(*this==lhs); } }; typedef std::vector FPointVector; /*! Defines a region with floating point coordinates */ struct FRegion { /*! Clears the region. */ void Clear(){ pts.clear(); } /*! A vector of points */ FPointVector pts; /*! Equality test */ bool operator==(const FRegion& lhs) const { if(lhs.pts.size() == pts.size()) { size_t sz = pts.size(); for(size_t i = 0; i < sz; i++) { if(lhs.pts[i] == pts[i]) continue; return false; } return true; } return false; } bool operator!=(const FRegion& lhs)const { return !(*this==lhs); } }; typedef std::vector FRegionVector; /*! Defines a floating point grid coords */ struct FGridCoords { /* Empty constructor */ FGridCoords() { upperleft.x = 0.0f, upperleft.y = 0.0f; upperright = lowerright = lowerleft = upperleft; } /*! Cast constructor from FRegion */ FGridCoords(const FRegion& r) { upperleft.x = 0.0f, upperleft.y = 0.0f; upperright = lowerright = lowerleft = upperleft; if (r.pts.size()>=4) { upperleft = r.pts.at(UpperLeft); upperright = r.pts.at(UpperRight); lowerright = r.pts.at(LowerRight); lowerleft = r.pts.at(LowerLeft); } } /*! Cast operator to FRegion */ operator FRegion() { FRegion r; r.pts.resize(4); r.pts[UpperLeft] = upperleft; r.pts[UpperRight] = upperright; r.pts[LowerRight] = lowerright; r.pts[LowerLeft] = lowerleft; return r; } /*! Tests if the rectangle is empty */ bool IsEmpty() const { if (upperleft == upperright && lowerleft == lowerright && upperleft == lowerleft) return true; return false; } /*! The upper left coordinate */ FPoint upperleft; /*! The upper right coordinate */ FPoint upperright; /*! The lower right coordinate */ FPoint lowerright; /*! The lower left coordinate */ FPoint lowerleft; }; /*! Defines an integral location */ struct Point { /*! The X coordinate */ int32_t x; /*! The Y coodinate */ int32_t y; /*! Equality test */ bool operator==(const Point& lhs)const { return (x==lhs.x && y==lhs.y); } /*! Inequality test */ bool operator!=(const Point& lhs)const { return !(*this==lhs); } }; /*! Defines an integral location */ struct PointU16_t { /*! The X coordinate */ u_int16_t x; /*! The Y coodinate */ u_int16_t y; }; /*! Defines an integral region */ struct Region { /*! Clears the region. */ void Clear(){ pts.clear(); } /*! A vector of points. */ std::vector pts; }; /*! Defines an integral rectagle */ struct GridCoords { /*! Empty constructor */ GridCoords() { upperleft.x = 0; upperleft.y = 0; upperright = lowerright = lowerleft = upperleft; } /*! Cast constructor from a Region */ GridCoords(const Region& r) { upperleft.x = 0; upperleft.y = 0; upperright = lowerright = lowerleft = upperleft; if (r.pts.size()>=4) { upperleft = r.pts.at(UpperLeft); upperright = r.pts.at(UpperRight); lowerright = r.pts.at(LowerRight); lowerleft = r.pts.at(LowerLeft); } } /*! Cast operator to Region */ operator Region() { Region r; r.pts.resize(4); r.pts[UpperLeft] = upperleft; r.pts[UpperRight] = upperright; r.pts[LowerRight] = lowerright; r.pts[LowerLeft] = lowerleft; return r; } /*! Tests if the rectangle is empty */ bool IsEmpty() const { if (upperleft == upperright && lowerleft == lowerright && upperleft == lowerleft) return true; return false; } /*! The upper left cooridnate */ Point upperleft; /*! The upper right coordinate */ Point upperright; /*! The lower right coordinate */ Point lowerright; /*! The lower left coordinate */ Point lowerleft; }; /*! An STL vector or floating point regions. */ typedef std::vector FRegionVector; /*! An STL vector of regions. */ typedef std::vector RegionVector; /*! This is a class for holding x-y coordinates.*/ class XYCoord { public: /*! Constructor */ XYCoord() { xCoord = 0; yCoord = 0; } /*! Constructor */ XYCoord(int16_t x, int16_t y) { xCoord = x; yCoord = y; } /*! Destructor */ ~XYCoord() {} /*! x-coordinate */ int16_t xCoord; /*! y-coordinate */ int16_t yCoord; /*! Assignment operator */ XYCoord operator=(const XYCoord &p) { xCoord = p.xCoord; yCoord = p.yCoord; return *this; } /*! equality operator */ bool operator==(const XYCoord &p) const { return (xCoord == p.xCoord && yCoord == p.yCoord); } /*! inequality operator */ bool operator!=(const XYCoord &p) const { return (xCoord != p.xCoord || yCoord != p.yCoord); } /*! less than operator */ bool operator<(const XYCoord& p) const { return (yCoord < p.yCoord ? true : ((yCoord == p.yCoord && xCoord < p.xCoord) ? true : false)); } }; /*! vector of XYCoord */ typedef std::vector XYCoordVector; /*! constant iterator of XYCoord */ typedef std::vector::iterator XYCoordIt; /*! constant iterator of XYCoord */ typedef std::vector::const_iterator XYCoordConstIt; } #endif affxparser/src/fusion/calvin_files/utils/src/DateTime.cpp0000644000175200017520000001443614516003651024625 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/utils/src/DateTime.h" // #include "calvin_files/exception/src/InterpretationException.h" #include "calvin_files/utils/src/StringUtils.h" // #include // using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_exceptions; #define DATE_PRINTF_FORMAT std::wstring(L"%02d-%02d-%02d") #define TIME_PRINTF_FORMAT std::wstring(L"%02d:%02d:%02d") #ifdef _MSC_VER #pragma warning(disable: 4996) // Turn off warnings for depricated VC++ functions. #endif /* * Initialize the class. */ DateTime::DateTime() { Clear(); } /* * Clean up the class. */ DateTime::~DateTime() { } /* * Erase the date and time values. */ void DateTime::Clear() { coordinateduniversaltime = true; affydate = L""; affytime = L""; } /*! The date. * * @return The date. */ std::wstring DateTime::Date() const { return affydate; } /*! Set the date. * */ void DateTime::Date(std::wstring value) { StringUtils::STLTrimRight(value); StringUtils::STLTrimLeft(value); affydate = value; } /*! The time. * * @return The time. */ std::wstring DateTime::Time() const { return affytime; } /*! Set the time. * */ void DateTime::Time(std::wstring value) { StringUtils::STLTrimRight(value); StringUtils::STLTrimLeft(value); affytime = value; } /* * Get the current date and time. * * @return DateTime */ DateTime DateTime::GetCurrentDateTime() { DateTime currentDateTime; size_t stringLength = 0; const size_t MAX_DATETIME_BUFFER_SIZE = 100; wchar_t strDest[MAX_DATETIME_BUFFER_SIZE]; // Get the current local time. time_t long_time = 0; time( &long_time ); struct tm* newtime = gmtime( &long_time ); // Conver the date to a string. stringLength = wcsftime(strDest, MAX_DATETIME_BUFFER_SIZE, DATEFORMAT.c_str(), newtime); wcsftime(strDest, stringLength+1, DATEFORMAT.c_str(), newtime); currentDateTime.Date(strDest); // Convert the time to a string. stringLength = wcsftime(strDest, MAX_DATETIME_BUFFER_SIZE, TIMEFORMAT.c_str(), newtime); wcsftime(strDest, stringLength+1, TIMEFORMAT.c_str(), newtime); currentDateTime.Time(strDest); return currentDateTime; } /*! Gets whether or not the DateTime object time value is UTC or not. * * @return UTC or not. */ bool DateTime::IsUTC() { return coordinateduniversaltime; } std::wstring DateTime::ToString() { if(affydate == L"" || affytime == L"") { DateTime dt = GetCurrentDateTime(); this->Date(dt.Date()); this->Time(dt.Time()); } std::wstring datetime = this->Date() + DATETIME_SEPERATOR_FORMAT_SPECIFIER + this->Time(); if (coordinateduniversaltime) datetime += ZULU_FORMAT_SPECIFIER; return datetime; } /* * Converts the string into a DateTime. */ DateTime DateTime::Parse(std::wstring value) { DateTime result; int32_t posT = (int32_t) value.find(DATETIME_SEPERATOR_FORMAT_SPECIFIER); if (posT == std::wstring::npos) { FormatException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } std::wstring date = value.substr(0, posT); CheckDateFormat(date); bool utc = false; int32_t posZ = (int32_t) value.find(ZULU_FORMAT_SPECIFIER); if (posZ != std::wstring::npos) { if (posZ < posT) { FormatException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } utc = true; value = value.substr(0,posZ); // remove the ZULU specifier } std::wstring time = value.substr(posT+1); // Check the time format CheckTimeFormat(time); // Looks good result.Time(time); result.Date(date); result.coordinateduniversaltime = utc; return result; } /* * Get a properly formatted Date string. */ std::wstring DateTime::FormatDate(u_int32_t year, u_int32_t month, u_int32_t day) { wchar_t buf[50]; FormatString3(buf, 50, DATE_PRINTF_FORMAT.c_str(), year, month, day); return buf; } /* * Get a properly formatted Time string. */ std::wstring DateTime::FormatTime(u_int32_t hour, u_int32_t minute, u_int32_t second) { wchar_t buf[50]; FormatString3(buf, 50, TIME_PRINTF_FORMAT.c_str(), hour, minute, second); return buf; } /* * Get a properly formatted date-time string. */ std::wstring DateTime::FormatDateTime(u_int32_t year, u_int32_t month, u_int32_t day, u_int32_t hour, u_int32_t minute, u_int32_t second, bool utc) { std::wstring datetime = FormatDate(year, month, day) + DATETIME_SEPERATOR_FORMAT_SPECIFIER + FormatTime(hour, minute, second); if (utc) datetime += ZULU_FORMAT_SPECIFIER; return datetime; } /* * Check the date format and modify it if needed and possible. */ void DateTime::CheckDateFormat(std::wstring& date) { // Check the format int32_t y, m, d; if (swscanf(date.c_str(), L"%d-%d-%d", &y, &m, &d) != 3) { if (swscanf(date.c_str(), L"%d/%d/%d", &y, &m, &d) == 3 && date.length() < 50) { date = FormatDate(y, m, d); } else { FormatException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } } /* * Check the time format. */ void DateTime::CheckTimeFormat(std::wstring& time) { int32_t h, M, s; if (swscanf(time.c_str(), L"%d:%d:%d", &h, &M, &s) != 3) { FormatException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } affxparser/src/fusion/calvin_files/utils/src/DateTime.h0000644000175200017520000001110114516003651024254 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DateTime_HEADER_ #define _DateTime_HEADER_ /*! \file DateTime.h This file provides data structures for holding a date and time. */ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include #include // namespace affymetrix_calvin_utilities { /*! The zulu format. */ #define ZULU_FORMAT_SPECIFIER std::wstring(L"Z") /*! The datetime separator format specifier.*/ #define DATETIME_SEPERATOR_FORMAT_SPECIFIER std::wstring(L"T") /*! The date format string. */ #define DATEFORMAT std::wstring(L"%Y-%m-%d") // YYYY-MM-DD /*! The time format string. */ #define TIMEFORMAT std::wstring(L"%H:%M:%S") // hh:mm:ss /*! This class provides storage for a date and time object. */ class DateTime { public: /*! Constructor */ DateTime(); /*! Destructor */ ~DateTime(); protected: /*! The date */ std::wstring affydate; /*! The time */ std::wstring affytime; /*! indicates if the time falue is UTC */ bool coordinateduniversaltime; public: /*! The date. * * @return The date. */ std::wstring Date() const; /*! Set the date. * * @param value The new date. */ void Date(std::wstring value); /*! The time. * * @return The time. */ std::wstring Time() const; /*! Set the time. * * @param value The new time. */ void Time(std::wstring value); /*! Clears the members. */ void Clear(); /*! Gets the current date and time. * * @return The current date and time. */ static DateTime GetCurrentDateTime(); /*! Gets whether or not the DateTime object time value is UTC or not. * * @return UTC or not. */ bool IsUTC(); /*! Sets if the DateTime is UTC or not. * @param value True if the DateTime should be considered UTC. */ void UTC(bool value) { coordinateduniversaltime = value; } /*! Gets the current date and time and concatinates both together. * * @return The string representation of the date and time. */ std::wstring ToString(); /*! Converts the string into a DateTime. * @param value A wstring containing the date and time to convert. The format must match that returned by ToString(). * @return A DateTime equivalent to the date and time in value. * @exception FormatException */ static DateTime Parse(std::wstring value); /*! Get a properly formatted Date string. * @param year A four digit year. * @param month A one-based month of the year. * @param day A one-based day of month * @return A properly formatted date string. */ static std::wstring FormatDate(u_int32_t year, u_int32_t month, u_int32_t day); /*! Get a properly formatted Time string. * @param hour Zero-based hour of day. * @param minute Zero-based minute of hour. * @param second Zero-based second of minute. * @return A properly formatted time string. */ static std::wstring FormatTime(u_int32_t hour, u_int32_t minute, u_int32_t second); /*! Get a properly formatted date-time string. * @param year A four digit year. * @param month A one-based month of the year. * @param day A one-based day of month * @param hour Zero-based hour of day. * @param minute Zero-based minute of hour. * @param second Zero-based second of minute. * @param utc Indicates if the time is Universal Coordinated Time. * @return A properly formatted date-time string. */ static std::wstring FormatDateTime(u_int32_t year, u_int32_t month, u_int32_t day, u_int32_t hour, u_int32_t minute, u_int32_t second, bool utc); protected: /*! Check the date format and may modify it. * @param date The formatted date string. * @exception FormatException */ static void CheckDateFormat(std::wstring& date); /*! Check the itme format. * @param time The formatted time string. * @exception FormatException */ static void CheckTimeFormat(std::wstring& time); }; } #endif // _DateTime_HEADER_ affxparser/src/fusion/calvin_files/utils/src/FileUtils.cpp0000644000175200017520000002474714516003651025037 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifdef _WIN32 #include "windows.h" #endif #include "calvin_files/utils/src/FileUtils.h" // #include "util/Fs.h" // #include #include #include #include // #ifdef _WIN32 #include #define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) #else #include #include #endif #ifdef WIN32 #include #include #define BUFFSIZE 10000 #endif #define POSIX_OPEN open #define POSIX_CLOSE close using namespace affymetrix_calvin_utilities; using namespace std; static string LockFileExtension = ".lock"; /** Little template function to make string conversion easy. this isn't the fastest way to do things, but it is easy. */ template std::string FUToStr(const T &t) { std::ostringstream s; s.setf(std::ios_base::boolalpha); s << t; if(s.str() == "-1.#INF") return(FUToStr("-inf")); else if(s.str() == "1.#INF") return(FUToStr("inf")); else if(s.str() == "-1.#IND") return(FUToStr("nan")); else if(s.str() == "1.#IND") return(FUToStr("nan")); else if(s.str() == "-Inf") return(FUToStr("-inf")); else if(s.str() == "Inf") return(FUToStr("inf")); return s.str(); }; ///@todo copy of the code from Util::subChar. Ugly Hack to prevent APT dependencies static void subChar(std::string &s, char from, char to) { std::string::size_type pos = 0; for(pos = 0; pos < s.size(); pos++) { if(s[pos] == from) { s[pos] = to; } } } ///@todo copy of the code from Util::convertPathName. Ugly Hack to prevent APT dependencies /* Private method to this file used to convert a relative path into an absolute path, and if successful to add "\\?\" prefix. This prevents the system APIs for failing on long filenames/paths by forcing them to pass the filename directly down into the low level file system APIs. */ #if defined (WIN32) std::string _getFullPathWindowsLocal(const std::string &in) { DWORD retval=1; wchar_t *wbuffer = new wchar_t[BUFFSIZE]; // Wide Char buffer for input path wchar_t *buffer = new wchar_t[BUFFSIZE]; // Wide Char buffer for output absolute path char *abuffer = new char[BUFFSIZE]; // Asci Char buffer for output absolute path unsigned int strLen = 0; // Setup wbuffer with our input const char *inPtr = in.c_str(); strLen = strlen(inPtr); if(strLen > BUFFSIZE - 1) { std::string se = "Cannot handle string longer than " + FUToStr(BUFFSIZE) + ": '" + in + "' is " + FUToStr(strLen); // PATCH: std::exception(const char*) does not have to exist according // to standards. Using subclass runtime_error instead. /HB throw new std::runtime_error(se.c_str()); } mbstowcs(wbuffer,inPtr, strLen + 1); // Compute the absolute path. We use the wide char // version which can handle > MAX_PATH retval = GetFullPathNameW(wbuffer,BUFFSIZE - 1,buffer,NULL); // If no characters were converted, then simply return the original string if (retval < 1) { delete [] wbuffer; delete [] buffer; delete [] abuffer; return in; } // If we will overflow our buffer, then abort. if(retval > BUFFSIZE-1) { // PATCH: std::exception(const char*) does not have to exist according // to standards. Using subclass runtime_error instead. /HB throw new std::runtime_error("Unexpected failure. Converted more characters than expected"); } // Convert the absolute path to asci char from wide char wcstombs(abuffer, (wchar_t *)buffer, retval + 1); // Free up memory and return our string string rs; rs = abuffer; delete [] wbuffer; delete [] buffer; delete [] abuffer; if(rs == "") { // If our result is empty, return input string return in; } else { // Otherwise we have an absolute path, so add the "\\?\" magic if(rs == "") { return in; } else { if(rs.substr(0,2) == "\\\\") return rs; else return "\\\\?\\" + rs; } } } #endif ///@todo copy of the code from Util::convertPathName. Ugly Hack to prevent APT dependencies std::string convertPathNameLocal(const std::string &path, bool singleFile) { std::string s = path; if(s.find(':') != std::string::npos) return s; #if defined (WIN32) // Convert forward slash to back slash -- 92 is the ascii code for '\' subChar(s, '/', 92); // If we are given a single file name, then try and cope with extra long names // by using '\\?\C:\...\' magic if(singleFile) { // First we split into a path and filename parts size_t pos=s.rfind("\\"); string pathPart, filePart; if(pos != std::string::npos) { pathPart = s.substr(0,pos); filePart = s.substr(pos+1); } else { // No path part found, so assume CWD pathPart = "."; filePart = s; } //Verbose::out(1,"Path Part = '" + pathPart + "'"); //Verbose::out(1,"File Part = '" + filePart + "'"); // Now we call this method to convert the minimal path part to // an absolute path. _getFullPathWindows() will add the // "\\?\" magic if appropriate. pathPart = _getFullPathWindowsLocal(pathPart); s = pathPart + '\\' + filePart; } #else // Unix deals with long filenames/paths as one would expect. // So all we need to do is flip back slashes to forward slashes. // 92 is the ascii code for '\' subChar(s, 92, '/'); #endif //Verbose::out(1, "Converted '" + path + "' to '" + s + "'"); return s; } /* * Check the existance of the file. */ bool FileUtils::Exists(const char *fileName) { ///@todo this is an ugly hack to prevent APT dependencies. /// The following is a copy from Util::fileReadable() int f; f = Fs::aptOpen(fileName, O_RDONLY); if(f < 0) { f = Fs::aptOpen(convertPathNameLocal(fileName,true).c_str(), O_RDONLY); if(f < 0) { return false; } } POSIX_CLOSE(f); return true; } /* * Delete the input file. */ bool FileUtils::RemoveFile(const char *fileName) { return (remove(fileName) == 0); } /* * Check if either the file or the lock file exists. * Return false the file does not exist of if the lock file does exist. * Otherwise create the lock file and return the status of the creation. */ bool FileUtils::LockFile(const char *fileName) { if (FileUtils::Exists(fileName) == false) return false; string lockFile = fileName + LockFileExtension; if (FileUtils::Exists(lockFile.c_str()) == true) return false; ofstream fileStream; Fs::aptOpen(fileStream, lockFile, ios::out); bool isOpen = fileStream.is_open(); fileStream.close(); return isOpen; } /* * If the lock file does not exist then return a false. * Otherwise remove the lock file and return the status. */ bool FileUtils::UnlockFile(const char *fileName) { string lockFile = fileName + LockFileExtension; if (FileUtils::Exists(lockFile.c_str()) == false) return true; return (remove(lockFile.c_str()) == 0); } list FileUtils::ListFiles(const char *pathName, const char *ext) { list files; string basePath = pathName; if (basePath.length() > 0) { if (basePath[basePath.length()-1] != '\\' && basePath[basePath.length()-1] != '/') { basePath += "/"; } } string exten = ext; #ifdef _MSC_VER WIN32_FIND_DATA findData; string search = basePath + "*."; if (exten.length() == 0) search += "*"; else search += exten; HANDLE hHandle = FindFirstFile(search.c_str(), &findData); BOOL bFound = (hHandle != INVALID_HANDLE_VALUE); while (bFound) { if (!(findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { string file = basePath + findData.cFileName; files.push_back(file); } // Find the next file. bFound = ::FindNextFile(hHandle, &findData); } ::FindClose(hHandle); #else DIR *dirp = opendir(pathName); struct dirent *dp; bool cont = (dirp != NULL); while (cont) { dp = readdir(dirp); if (dp) { string file = basePath + dp->d_name; if (Fs::fileExists(file)) { if (exten.length() == 0) { files.push_back(file); } else { if (file.rfind(ext) == file.length()-exten.length()) { files.push_back(file); } } } } cont = (dp != NULL && dirp != NULL); } if (dirp) closedir(dirp); #endif return files; } /* Deletes a file. */ void FileUtils::RemovePath(const char *path) { list files; string basePath = path; if (basePath.length() > 0) { if (basePath[basePath.length()-1] != '\\' && basePath[basePath.length()-1] != '/') { basePath += "/"; } } #ifdef _MSC_VER WIN32_FIND_DATA findData; string search = basePath + "*.*"; HANDLE hHandle = FindFirstFile(search.c_str(), &findData); BOOL bFound = (hHandle != INVALID_HANDLE_VALUE); while (bFound) { string fileName = findData.cFileName; if (fileName != "." && fileName != "..") { string filePath = basePath + findData.cFileName; if (!(findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { FileUtils::RemoveFile(filePath.c_str()); } else { FileUtils::RemovePath(filePath.c_str()); } } // Find the next file. bFound = ::FindNextFile(hHandle, &findData); } ::FindClose(hHandle); _rmdir(path); #else DIR *dirp = opendir(path); struct dirent *dp; bool cont = (dirp != NULL); while (cont) { dp = readdir(dirp); if (dp) { string fileName = dp->d_name; if (fileName != "." && fileName != "..") { string filePath = basePath + dp->d_name; if (Fs::fileExists(filePath)) { FileUtils::RemoveFile(filePath.c_str()); } else { FileUtils::RemovePath(filePath.c_str()); } } } cont = (dp != NULL && dirp != NULL); } if (dirp) closedir(dirp); rmdir(path); #endif } affxparser/src/fusion/calvin_files/utils/src/FileUtils.h0000644000175200017520000000520614516003651024471 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FileUtils_HEADER_ #define _FileUtils_HEADER_ /*! \file FileUtils.h This file provides file utilities. */ #include #include #include #include // namespace affymetrix_calvin_utilities { /*! This class provides utility functions for files. */ class FileUtils { public: /*! Checks for the existance of a file. * * @param fileName The name of the file to test for existance. * @return True if the file exists. */ static bool Exists(const char *fileName); /*! Locks a file for exclusive use. * This is a soft locking mechanism. The actual file is not locked, instead * it uses a "LCK" file to determine the lock status. * * @param fileName The name of the file to lock. * @return True if the file was successfully locked. */ static bool LockFile(const char *fileName); /*! Unlocks a file for exclusive use. * This is a soft locking mechanism. The actual file is not locked, instead * it uses a "LCK" file to determine the lock status. * * @param fileName The name of the file to unlock. * @return True if the file was successfully unlocked. */ static bool UnlockFile(const char *fileName); /*! Retrieves the list of files in a path that match the extension. * * @param pathName The path to search. * @param ext The file extension. * @return The list of files (full path) found in the directory. */ static std::list ListFiles(const char *pathName, const char *ext); /*! Deletes a file. * @param fileName The name of the file to delete. * @return True if the file was deleted. */ static bool RemoveFile(const char *fileName); /*! Deletes a file. * @param path The name of the path to delete. * @return True if the path was deleted. */ static void RemovePath(const char *path); }; }; #endif // _FileUtils_HEADER_ affxparser/src/fusion/calvin_files/utils/src/GenoCallCoder.cpp0000644000175200017520000011326014516003651025565 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file GenoCallCoder.cpp * @author Ray Wheeler * @date Tue May 27 13:31:31 PDT 2008 * * @brief Genotype call en/decoder */ // #include "calvin_files/utils/src/GenoCallCoder.h" // #include "file/TsvFile/TsvFile.h" #include "util/Convert.h" #include "util/Err.h" #include "util/Verbose.h" // #include #include #include #include #include #include // using namespace std; using namespace affx; const string GenoCallCoder::m_annotation_csv_delimiter = " // "; const string GenoCallCoder::m_annot_csv_skip_text = "N/A"; const string GenoCallCoder::m_extra_codes[] = { string("NotAvailable"), string("PossibleRareAllele") }; const string GenoCallCoder::m_marker_annotation_file_cnames[] = { string("Probe Set ID"), //string("Abstract Allele Name"), string("Allele Code"), string("Alleles Design Strand"), string("Alleles Reported Strand"), string("Alleles-Alias Reported Strand") }; const string GenoCallCoder::m_version_zero_codes[] = { string(""), // [0] string(""), // [1] string(""), // [2] string(""), // [3] string(""), // [4] string(""), // [5] string("AA"), // [6] string("BB"), // [7] string("AB"), // [8] string("AB_A"), // [9] string("AB_B"), // [10] string("NN"), // [11] string(""), // [12] string(""), // [13] string(""), // [14] string("") // [15] }; const char GenoCallCoder::m_num2alpha[] = {'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'}; /** * @brief choose (i.e. 'n choose k') * @param top - the top number, i.e. 'n' * @param bottom - the bottom number, i.e. 'k' */ int choose(int top, int bottom) { int i; int result = 1; if (top >= bottom && top >=0 && bottom >= 0) { if (bottom > top / 2) { bottom = top - bottom; } for (i = 1; i <= bottom; i++) { result = (result*top) / i; top--; } return result; } return 0; } /** * @brief Constructor * @param probeset_id - string identifying marker * @param abstract_allele - string (char) identifying allele independently of bases * @param reference_allele - string representing delimited list of design-time allele bases * @param report_allele - string representing delimited list of allele bases as they should be reported * @param allowable_cn_states = string representing delimited list of allowable copy number states for this marker * @param allele_count - number of alleles for this marker */ AlleleRecord::AlleleRecord(const std::string probeset_id, const std::vector abstract_allele, const std::vector reference_allele, const std::vector report_allele, const int allele_count, const std::vector allowable_cn_states ) : m_probeset_id(probeset_id), m_abstract_allele(abstract_allele), m_reference_allele(reference_allele), m_report_allele(report_allele), m_allele_count(allele_count), m_allowable_cn_states(allowable_cn_states) {} AlleleRecord::AlleleRecord(const std::string probeset_id, const std::vector abstract_allele, const std::vector reference_allele, const std::vector report_allele ) : m_probeset_id(probeset_id), m_abstract_allele(abstract_allele), m_reference_allele(reference_allele), m_report_allele(report_allele) { m_allele_count = 2; m_allowable_cn_states.push_back(2); } /* static bool compareAlleleRecords(AlleleRecord a, AlleleRecord b) { */ /* if (a.m_probeset_id != b.m_probeset_id) { */ /* return (a.m_probeset_id < b.m_probeset_id); */ /* } */ /* else { */ /* return (a.m_abstract_allele < b.m_abstract_allele); */ /* } */ /* } */ bool AlleleRecord::compareAlleleRecordsByProbesetId(AlleleRecord a, AlleleRecord b) { return (a.m_probeset_id < b.m_probeset_id); } /** * @brief constructor * @param max_allele_count - number of possible alleles for marker with highest allele count * @param data_size - string describing size of call field * @param version - numeric version of coding scheme * @param allele_delimiter - character to delimit alleles in string */ GenoCallCoder::GenoCallCoder(const int max_allele_count, const string data_size, const string version, const char allele_delimiter) { initialize(max_allele_count, data_size, version, allele_delimiter); } /** * @brief constructor * @param max_allele_count - number of possible alleles for marker with highest allele count * @param data_size - string describing size of call field * @param version - numeric version of coding scheme * @param allele_delimiter - character to delimit alleles in string * @param probeset_allele_name_table - vector of AlleleRecords (size 3 vector) containing probeset ids, allele names, and abstract alleles */ // GenoCallCoder::GenoCallCoder(const int max_allele_count, const string data_size, const string version, const char allele_delimiter, const vector probeset_allele_name_table) { // initialize(max_allele_count, data_size, version, allele_delimiter); // m_probeset_allele_name_table = probeset_allele_name_table; // vector::iterator at_start = m_probeset_allele_name_table.begin(); // vector::iterator at_end = m_probeset_allele_name_table.end(); // sort(at_start, at_end, compareAlleleRecordsByProbesetId); // } /** * @brief Constructor * @param max_allele_count - number of possible alleles for marker with highest allele count * @param data_size - string describing size of call field * @param version - numeric version of coding scheme * @param allele_delimiter - character to delimit alleles in string * @param probeset_allele_name_table - vector of (size 3 or 4) vector containing probeset ids, allele names, abstract alleles, and optional boolean report RevComp fields */ // GenoCallCoder(const int max_allele_count, const string data_size, const string version, const char allele_delimiter, const vector > probeset_allele_name_table) { // initialize(max_allele_count, data_size, version, allele_delimiter); // vector >::iterator at_it; // for (at_it = probeset_allele_name_table.begin(); at_it != probeset_allele_name_table.end(); at_it++) { // if (at_it->size() < 3) { // Verbose::die(0, "Improperly formated row in probeset allele name table"); // continue; // } // string probeset_id = at_it[0]; // string reference_allele = at_it[1]; // string abstract_allele = at_it[2]; // string report_revcomp; // if (at_it->size() == 4) { // report_revcomp = at_it[3]; // } // else { // report_revcomp = "0"; // } // if (probeset_id.empty()) { // Verbose::warn(0, "Empty probeset_id string in probeset_allele_name_table"); // continue; // } // if (reference_allele.empty()) { // Verbose::warn(0, "Empty reference_allele string for probeset_id '" + probeset_id + "' in probeset_allele_name_table."); // continue; // } // if (abstract_allele.empty()) { // Verbose::warn(0, "Empty abstract_allele string for probeset_id " + probeset_id + "' in probeset_allele_name_table."); // } // if (report_revcomp.empty()) { // Verbose::warn(0, "Empty report_revcomp boolean for probeset_id " + probeset_id + "' in probeset_allele_name_table."); // } // int allele_count = 0; // if (allele_count == 0) { // // alleles were not counted above // idx = abstract_allele.find(allele_delimiter); // while (idx != string::npos) { // allele_count++; // idx = abstract_allele.find(allele_delimiter, idx + 1); // } // } // // allele_count = number of delimiters + 1 // allele_count++; // for (i = 0; // m_probeset_allele_name_table = probeset_allele_name_table; // sort(at_start, at_end, compareAlleleRecordsByProbesetIdAndAbstractAllele); // } /** * @brief Constructor * @param max_allele_count - number of possible alleles for marker with highest allele count * @param data_size - string describing size of call field * @param version - numeric version of coding scheme * @param allele_delimiter - character to delimit alleles in string * @param marker_annotation_file - TsvFile instance of marker annotation file with columns for probeset ids, allele names, abstract alleles, and optional boolean report RevComp fields */ GenoCallCoder::GenoCallCoder(const int max_allele_count, const string data_size, const string version, const char allele_delimiter, const string marker_annotation_file) { initialize(max_allele_count, data_size, version, allele_delimiter); unsigned char zero_copy_number_call_code = abstractAlleleToGenotypeCallNum("ZeroCopyNumber"); /* HACK!!! Hard code valid call codes. */ vector > > valid_call_codes(3); vector cn0(1, zero_copy_number_call_code); vector > cn0_row(max_allele_count + 1, cn0); valid_call_codes[0] = cn0_row; vector cn1(1, m_min_translatable_code); vector > cn1_row(max_allele_count + 1, cn1); valid_call_codes[1] = cn1_row; vector cn2(0); vector > cn2_row(max_allele_count + 1, cn2); valid_call_codes[2] = cn2_row; string ab(1, allele_delimiter); ab += "A"; for (int i = 2; i <= max_allele_count; i++) { for (int j = 1; j < i; j++) { valid_call_codes[1][i].push_back(m_min_translatable_code + j); } switch (i) { case 2: ab += "B"; break; case 3: ab += "C"; break; case 4: ab += "D"; break; case 5: ab += "E"; break; case 6: ab += "F"; break; } int start = m_min_translatable_code + max_allele_count + 1; int stop = start + (max_allele_count + 1 + ((max_allele_count + 1) * (max_allele_count + 1))) / 2; for (int j = start; j < stop; j++) { // for (int j = 25; j <= 50; j++) { if (m_abstract_codes[j].find_first_not_of(ab) == m_abstract_codes[j].npos) { valid_call_codes[2][i].push_back(j); } } } TsvFile annot_csv; annot_csv.m_optAutoTrim = true; if ( annot_csv.open( marker_annotation_file ) != TSV_OK ) { Err::errAbort (marker_annotation_file + ": failed to open marker annotation file."); } int marker_annotation_file_cnames_count = sizeof(m_marker_annotation_file_cnames) / sizeof (m_marker_annotation_file_cnames[0]); vector annot_csv_cidxs; size_t i; for (i = 0; i < marker_annotation_file_cnames_count; i++) { int temp_idx = annot_csv.cname2cidx(0, m_marker_annotation_file_cnames[i]); if (temp_idx == TSV_ERR_NOTFOUND) { Err::errAbort ("Couldn't find expected column name '" + m_marker_annotation_file_cnames[i] + "' in marker annotation file :" + marker_annotation_file); } else { annot_csv_cidxs.push_back(temp_idx); } } // int c = 0; while (annot_csv.nextLevel(0) == TSV_OK) { // get probeset_id string probeset_id; if (annot_csv.get(0, annot_csv_cidxs[0], probeset_id) != TSV_OK) { Err::errAbort("Failure to get value from '" + m_marker_annotation_file_cnames[i] + "' field in marker annotation file:" + marker_annotation_file); } // get abstract alleles, reference, report alleles, & report aliases vector > allele_lists; for (i = 1; i < annot_csv_cidxs.size(); i++) { string col_value; if (annot_csv.get(0, annot_csv_cidxs[i], col_value) != TSV_OK) { Err::errAbort("Failure to get value from '" + m_marker_annotation_file_cnames[i] + "' field in marker annotation file:" + marker_annotation_file); } // if annot.csv row doesn't have an allele to translate, e.g. a tiling // probe used for copy number, then skip it. erase allele_lists as a // flag to outer while-loop to skip if (col_value == m_annot_csv_skip_text && m_marker_annotation_file_cnames[i] != "Alleles-Alias Reported Strand") { allele_lists.resize(0); break; } unsigned int allele_start = 0; size_t idx = col_value.find(m_annotation_csv_delimiter); if (idx == col_value.npos) { idx = col_value.size(); } vector allele_vec; //cout << col_value + '\n'; while (allele_start < col_value.size()) { //cout << allele_start << '-' << idx << '-' << col_value.size() << ':'; //cout << col_value.substr(allele_start, idx - allele_start) + "\n"; allele_vec.push_back(col_value.substr(allele_start, idx - allele_start)); allele_start = idx + m_annotation_csv_delimiter.size(); idx = col_value.find(m_annotation_csv_delimiter, idx + m_annotation_csv_delimiter.size()); if (idx == col_value.npos) { idx = col_value.size(); } } // if there is a report alias, then pop off vector of report // alleles in order to push on a vector of aliases in its place. // Otherwise do not store useless alias vector if (m_marker_annotation_file_cnames[i] == "Alleles-Alias Reported Strand") { if (col_value != m_annot_csv_skip_text) { allele_lists.pop_back(); // cout << allele_vec[0] << '/' << allele_vec[1] << '\n'; } else { break; } } allele_lists.push_back(allele_vec); } // if (c++ < 3) { // cout << allele_lists[0] + ':' + allele_lists[1] + ':' + allele_lists[2] + '\n'; // } // The number of alleles for a marker can be determined by the length of the delimited list of abstract alleles, since abstract alleles and delimiters each have length == 1. length_of_list == num_alleles + num_delimiters == num_alleles + (num_alleles - 1). num_alleles = (length_of_list + 1) / 2. // int num_alleles = (allele_lists[1].size() + 1) / 2; // skip row if none of "Allele Code", "Alleles Design Strand", "Alleles Reported Strand" is "N/A" if (allele_lists.empty()) { continue; } size_t num_alleles = allele_lists[0].size(); // vector cn_states(3); // cn_states.push_back(0); // cn_states.push_back(1); // cn_states.push_back(2); vector call_code_list(1, zero_copy_number_call_code); for (unsigned int j = 1; j < valid_call_codes.size(); j++) { call_code_list.insert(call_code_list.end(), valid_call_codes[j][num_alleles].begin(), valid_call_codes[j][num_alleles].end()); } // for (int j = 0; j < call_code_list.size(); j++) { // cout << static_cast(call_code_list[j]) << ' '; // } // cout << '\n'; AlleleRecord new_record(probeset_id, allele_lists[0], allele_lists[1], allele_lists[2], num_alleles, call_code_list); m_probeset_allele_name_table.push_back(new_record); } sort(m_probeset_allele_name_table.begin(), m_probeset_allele_name_table.end(), AlleleRecord::compareAlleleRecordsByProbesetId); // for (i = 0; i < m_probeset_allele_name_table.size(); i++) { // cout << m_probeset_allele_name_table[i].m_probeset_id + ' '; // cout << m_probeset_allele_name_table[i].m_abstract_allele + ' '; // cout << m_probeset_allele_name_table[i].m_reference_allele + ' '; // cout << m_probeset_allele_name_table[i].m_report_allele + '\n'; // } } /** * @brief Creates and populates marker translation data structure from given file path to marker annotation file * @param filepath - path to marker annotation file * @param use_report_revcomp - boolean to indicate whether report_revcomp should be utilized */ // static vector > getProbesetAlleleTranslationsFromTsvFile(string filepath, const char allele_delimiter) { // // replace annotation csv delimiter with user-supplied delimiter // string ANNOTATION_CSV_DELIMITER = "//"; // size_type acd_size = ANNOTATION_CSV_DELIMITER.size(); // size_type idx = reference_allele.find(ANNOTATION_CSV_DELIMITER); // while (idx != reference_allele.npos) { // reference_allele.replace(idx, acd_size, allele_delimiter); // idx = reference_allele.find(ANNOTATION_CSV_DELIMITER, idx + 1); // } // size_type idx = abstract_allele.find(ANNOTATION_CSV_DELIMITER); // while (idx != abstract_allele.npos) { // abstract_allele.replace(idx, acd_size, allele_delimiter); // idx = abstract_allele.find(ANNOTATION_CSV_DELIMITER, idx + 1); // // might as well count alleles now // } // } /** * @brief initialize * @param max_allele_count - number of possible alleles for marker with highest allele count * @param data_size - string describing size of call field * @param version - numeric version of coding scheme */ void GenoCallCoder::initialize(const int max_allele_count, const string data_size, const string version, char allele_delimiter) { // enforcing dmet parameters for now if (data_size == "UCHAR") { m_data_size_bits = 8; } else { Verbose::warn(0, "'" + data_size + "' is not an accepted data_size. 'UCHAR' is currently the only accepted data_size."); } int version_zero_codes_count = sizeof(m_version_zero_codes) / sizeof(m_version_zero_codes[0]); unsigned int extra_codes_count = sizeof(m_extra_codes) / sizeof(m_extra_codes[0]); int num2alpha_count = sizeof(m_num2alpha) / sizeof(m_num2alpha[0]); int i; m_max_extra_code = (1 << m_data_size_bits) - 1; m_abstract_codes.resize(m_max_extra_code + 1); for (i = 0; i < num2alpha_count; i++) { m_alpha2num[m_num2alpha[i]] = i; //m_alpha2num.insert(map::value_type(m_num2alpha[i], i)); } if (version == "0.0" || version == "0") { m_max_allele_count = 2; m_min_code = 6; m_min_translatable_code = 6; m_max_code = 11; m_min_extra_code = 1 << m_data_size_bits; m_version = "0"; for (i = 0; i < version_zero_codes_count; i++) { m_abstract_codes[i] = m_version_zero_codes[i]; // if (! m_version_zero_codes[i].empty()) { // m_alleles[m_version_zero_codes[i]] = i; // } } if (allele_delimiter != '\0') { allele_delimiter = '\0'; Verbose::warn(0, "Ignoring given allele_delimiter '" + string(1,allele_delimiter) + "' -- arbitrary allele delimter is not supported for version 0."); // for (i = 6; i <= 8; i++) { // string new_allele_string; // new_allele_string = m_abstract_codes[i][0]; // //cout << "init: " << new_allele_string << '\n'; // for (int j = 1; j < m_abstract_codes[i].size(); j++) { // new_allele_string += allele_delimiter; // new_allele_string += m_abstract_codes[i][j]; // //cout << "adding: " << allele_delimiter << m_abstract_codes[i][j] << '\n'; // } // //cout << "done: " << new_allele_string << '\n'; // m_abstract_codes[i].assign(new_allele_string); // } } } else { // assert(max_allele_count == 6); assert(version == "1.0"); if (version == string("1.0")) { m_min_extra_code = 254; // m_max_code = calculateMaxCall(max_allele_count, data_size); } assert(m_max_extra_code - m_min_extra_code + 1 <= extra_codes_count); m_max_allele_count = max_allele_count; m_min_code = version_zero_codes_count; m_version = version; // index indicating where the translatable alleles begin. First 2 codes // are reserved for 'NoCall' and 'ZeroCopyNumber' (added below). m_min_translatable_code = m_min_code + 2; // (add 1 for 'n' allele) -- adding 'n' allele allows for half/partial calls int effective_max_allele_count = max_allele_count + 1; // calculate maximum possible allele code number // the value is given by: // argmax{CN: sum_{j=1}^{CN} ( sum_{i=1}^{min(E, j)} ( choose(E, i) * choose(j-1,i-1) ) ) <= Space} // where 'E' = effective_max_allele_count, and 'Space' = the amount of // space in the array available for storing allele codes unsigned int candidate_max = m_min_translatable_code + max_allele_count; if (candidate_max > m_max_extra_code - extra_codes_count) { Err::errAbort("Value for option 'max_allele_count':" + ToStr(max_allele_count) + " is to large for given 'data_size': " + data_size); } int max_cn = 1; while (candidate_max < m_max_extra_code - extra_codes_count) { m_max_code = candidate_max; max_cn++; int bound = max_cn; if (bound > effective_max_allele_count) { bound = effective_max_allele_count; } for (int allele_idx = 1; allele_idx <= bound; allele_idx++) { candidate_max += choose(effective_max_allele_count, allele_idx) * choose(max_cn - 1, allele_idx - 1); } } max_cn--; // fill out m_abstract_codes[] with abstract alleles. // this is a bootstrap method. allele strings for each copy number are // built upon the allele strings for the previous copy number. First the // array is initialized with CN=1 alleles: A, B, C, ... n. CN=2 alleles // are generated by iteratively prepending single allele letters to the // previous CN=1 alleles. E.g. 'A' is prepended to all CN=1 alleles // starting with the A allele to make AA, AB, AC, ... An, and 'B' is // prepended to all of the alleles starting with the B allele: BB, BC, ..., // Bn, and 'C' is prepended to all the alleles starting with the C allele, // and so on. The hard part is in the index bookkeeping. For each // prepending allele letter ('A','B', etc.), an index is kept of where in // the array that letter needs to start prepending (start_idxs[]). An // index is kept of where to stop prepending (tail_stop_idx). After each // allele letter completes a series of prepending, it's start index is // updated so that it will begin prepending in the correct place for the // next copynumber. // initialize for haploid alleles int m_abstract_codes_end = m_min_translatable_code; vector start_idxs(effective_max_allele_count, m_min_translatable_code - 1); vector prepend_alleles(effective_max_allele_count); int i; for (i = 0; i < max_allele_count; i++) { prepend_alleles[i] = string(1, m_num2alpha[i]); } prepend_alleles[i] = 'n'; for (int cn = 1; cn <= max_cn; cn++) { int tail_stop_idx = m_abstract_codes_end; for (i = 0; i < start_idxs.size(); i++) { // save the start_idx for the next copy number int next_start_idx = m_abstract_codes_end; for (int curr_idx = start_idxs[i]; curr_idx < tail_stop_idx; curr_idx++) { m_abstract_codes[m_abstract_codes_end++] = prepend_alleles[i] + m_abstract_codes[curr_idx]; } start_idxs[i] = next_start_idx; } } // add non-translatable codes m_abstract_codes[m_min_code] = "NoCall"; // Unable to genotype m_abstract_codes[m_min_code + 1] = "ZeroCopyNumber"; //m_abstract_codes[17] = "0"; // add extra codes for this version for (i = 0; i < extra_codes_count; i++) { m_abstract_codes[m_max_extra_code - i] = m_extra_codes[i]; } // insert allele delimiter if one was provided if (allele_delimiter != '\0') { for (i = m_min_translatable_code; i <= m_max_code; i++) { string new_allele_string; new_allele_string = m_abstract_codes[i][0]; for (size_t j = 1; j < m_abstract_codes[i].size(); j++) { new_allele_string += allele_delimiter; new_allele_string += m_abstract_codes[i][j]; } m_abstract_codes[i].assign(new_allele_string); } } } // make reverse map of abstract allele codes for encoding for (i = m_min_code; i <= m_max_code; i++) { m_alleles[m_abstract_codes[i]] = i; } for (i = m_min_extra_code; i <= m_max_extra_code; i++) { m_alleles[m_abstract_codes[i]] = i; } m_allele_delimiter = allele_delimiter; } /** * @brief Retrieve vector of probeset ids in allele name table */ vector GenoCallCoder::getProbesetIds() { vector result; vector::iterator it; for (it = m_probeset_allele_name_table.begin(); it != m_probeset_allele_name_table.end(); it++) { result.push_back(it->m_probeset_id); } return result; } /** * @brief Retrieve vector of genotype call codes allowable for given probeset_id -- based on probeset's allele count and allowable copy number states. * @param probeset_id - id of probeset to get codes for */ vector GenoCallCoder::getValidGenotypeCallCodes(std::string probeset_id) { vector temp(0); AlleleRecord key(probeset_id, temp, temp, temp); vector::iterator result = lower_bound(m_probeset_allele_name_table.begin(), m_probeset_allele_name_table.end(), key, AlleleRecord::compareAlleleRecordsByProbesetId); /* HACK!!! The field allowable_cn_states has been highjacked for some other expedient purpose. */ return result->m_allowable_cn_states; } /** * @brief Decode from call code number to allele string * @param call_code - number to decode */ //template string GenoCallCoder::decodeCall(const T1 call_code) { string GenoCallCoder::genotypeCallNumToAbstractAllele(const unsigned char call_code) { if (call_code < m_abstract_codes.size()) { return m_abstract_codes[call_code]; } else { Verbose::warn(0, "Call code '" + ToStr(call_code) + "' is out of bounds for size " + m_data_size); return string(""); } } /** * @brief Decode from call code number to reference allele name string for * given probeset id * * @param probeset_id - id of probeset to decode * @param call_code - number to decode */ string GenoCallCoder::genotypeCallNumToReferenceAllele(const string probeset_id, const unsigned char call_code) { return genotypeCallNumToAllele(probeset_id, call_code, "ref"); } /** * @brief Translate allele name from one type to another {absract, reference, * report} for given probeset id and allele type * * @param probeset_id - id of probeset to decode * @param abstract_allele - abstract allele to decode * @param input_type - flag to indicate input type of allele name {"abs","ref","rep"} * @param output_type - flag to indicate output type of allele name {"abs","ref","rep"} */ string GenoCallCoder::alleleNameConvert(const string probeset_id, const string input_allele_string, const string input_type, const string output_type) { string allele_name_string; //vector::iterator at_start = m_probeset_allele_name_table.begin(); //vector::iterator at_end = m_probeset_allele_name_table.end(); vector temp(0); AlleleRecord key(probeset_id, temp, temp, temp); //cout << probeset_id + '*' + abstract_allele << '\n'; vector::iterator result = lower_bound(m_probeset_allele_name_table.begin(), m_probeset_allele_name_table.end(), key, AlleleRecord::compareAlleleRecordsByProbesetId); if (result == m_probeset_allele_name_table.end()) { Verbose::warn(0, "Cannot find probeset '" + probeset_id + "' in supplied probeset allele table"); } else { vector input_allele_vec; if (input_type == "abs") { input_allele_vec = result->m_abstract_allele; } else if (input_type == "ref") { input_allele_vec = result->m_reference_allele; } else if (input_type == "rep") { input_allele_vec = result->m_report_allele; } else { Err::errAbort("Value for parameter 'input_type':" + input_type + " is not one of {'abstract', 'reference', 'report'}"); } vector output_allele_vec; if (output_type == "abs") { output_allele_vec = result->m_abstract_allele; } else if (output_type == "ref") { output_allele_vec = result->m_reference_allele; } else if (output_type == "rep") { output_allele_vec = result->m_report_allele; } else { Err::errAbort("Value for parameter 'output_type':" + output_type + " is not one of {'abstract', 'reference', 'report'}"); } //cout << result->m_probeset_id + '|' + result->m_abstract_allele + '|' + result->m_reference_allele + '|' + result->m_report_allele + '\n'; //cout << result->m_probeset_id + '|' + result->m_abstract_allele[0] + '|' + result->m_reference_allele[0] + '|' + result->m_report_allele[0] + '\n'; int allele_start = 0; string query; while (allele_start < input_allele_string.size()) { int delimiter_pos = input_allele_string.find(m_allele_delimiter, allele_start); if (delimiter_pos == input_allele_string.npos) { delimiter_pos = input_allele_string.size(); } query = input_allele_string.substr(allele_start, delimiter_pos - allele_start); // cout << allele_start << '-' << delimiter_pos << '-' << input_allele_string.size() << ':' << query + "\n"; if (input_type == "abs" && query == string(1, m_abstract_nocall_char)) { // For now, using 'NoCall' text allele_name_string.append(m_abstract_codes[16]); } else { int i = 0; for (i = 0; i < input_allele_vec.size(); i++) { // cout << *aas_it << '-'; if (query == input_allele_vec[i]) { allele_name_string.append(output_allele_vec[i]); // append delimiter, if there is another allele to lookup if (delimiter_pos < input_allele_string.size()) { allele_name_string.append(string(1, m_allele_delimiter)); } break; } } if (i >= input_allele_vec.size()) { Verbose::warn(0, "Cannot find listing for '" + input_type + "' allele '" + query + "' for probeset: " + probeset_id); } } // set allele_start to position beyond delimiter allele_start = delimiter_pos + 1; } // cout << '\n'; } return allele_name_string; } /** * @brief Decode from call code number to report allele string for * given probeset id * * @param probeset_id - id of probeset to decode * @param call_code - number to decode */ string GenoCallCoder::genotypeCallNumToReportAllele(const string probeset_id, const unsigned char call_code) { return genotypeCallNumToAllele(probeset_id, call_code, "rep"); } /** * @brief Decode from call code number to allele string for * given probeset id and allele type * * @param probeset_id - id of probeset to decode * @param call_code - number to decode * @param allele_type - type of allele string to return */ string GenoCallCoder::genotypeCallNumToAllele(const string probeset_id, const unsigned char call_code, const string allele_type) { string allele_name_string; string abstract_allele_string = genotypeCallNumToAbstractAllele(call_code); if (call_code >= m_min_translatable_code && call_code <= m_max_code) { allele_name_string = alleleNameConvert(probeset_id, abstract_allele_string, "abs", allele_type); } else if ((call_code >= m_min_code && call_code < m_min_translatable_code) || (call_code >= m_min_extra_code && call_code <= m_max_extra_code) ) { allele_name_string = abstract_allele_string; } return allele_name_string; } /** * @brief Translate abstract allele to allele name for given * probeset id and allele type * * @param probeset_id - id of probeset to decode * @param abstract_allele - abstract allele to decode */ string GenoCallCoder::referenceAlleleToReportAllele(const string probeset_id, const string reference_allele) { return alleleNameConvert(probeset_id, reference_allele, "ref", "rep"); } /** * @brief Translate abstract allele to design allele name for given * probeset id * * @param probeset_id - id of probeset to decode * @param abstract_allele - abstract allele to decode */ string GenoCallCoder::abstractAlleleToReferenceAllele(const string probeset_id, const string abstract_allele) { return alleleNameConvert(probeset_id, abstract_allele, "abs", "ref"); } /** * @brief Translate abstract allele to reporting allele name for given * probeset id * * @param probeset_id - id of probeset to decode * @param abstract_allele - abstract allele to decode */ string GenoCallCoder::abstractAlleleToReportAllele(const string probeset_id, const string abstract_allele) { return alleleNameConvert(probeset_id, abstract_allele, "abs", "rep"); } /** * @brief Encode from allele string to numeric call code * @param allele_string - string of allele calls to encode */ //template T1 GenoCallCoder::encodeCall(string allele_string) { unsigned char GenoCallCoder::abstractAlleleToGenotypeCallNum(const string allele_string) { if (m_alleles.count(allele_string)) { return m_alleles[allele_string]; } else { Verbose::warn(0, "Allele string '" + allele_string + "' is not valid"); return 0; } } /** * @brief Takes vector of abstract allele characters encoded as * integers and returns genotype call number * @param allele_string - string of allele calls to encode */ unsigned char GenoCallCoder::abstractAlleleIntegersToGenotypeCallNum(const vector abstract_allele_ints) { string abstract_allele_string; int alphamap_size = sizeof(m_num2alpha) / sizeof(m_num2alpha[0]); //cout << abstract_allele_ints.size()<< ':' << m_num2alpha[abstract_allele_ints[0]] << '\n'; for (int i = 0; i < abstract_allele_ints.size(); i++) { if (abstract_allele_ints[i] >= 0 && abstract_allele_ints[i] < alphamap_size) { abstract_allele_string.append(1, m_num2alpha[abstract_allele_ints[i]]); } else { Verbose::warn(0, "Abstract allele integer '" + ToStr(abstract_allele_ints[i]) + "' is out of allele integer range: [0," + ToStr(alphamap_size) + ")"); return 0; } } return abstractAlleleToGenotypeCallNum(abstract_allele_string); } /** * @brief Decode vector of call code numbers to vector of allele strings * @param call_codes - vector of numbers to decode */ //template vector GenoCallCoder::decodeCallVec(const vector &call_codes) { vector GenoCallCoder::genotypeCallNumVecToAbstractAlleleVec(const vector &call_codes) { int i; vector decoded_calls(call_codes.size()); for (i = 0; i < call_codes.size(); i++) { if (call_codes[i] < m_abstract_codes.size()) { decoded_calls[i] = m_abstract_codes[call_codes[i]]; } else { Verbose::warn(0, "Call code '" + ToStr(call_codes[i]) + "' is out of bounds for size " + m_data_size); decoded_calls[i] = string(""); } } return decoded_calls; } /** * @brief Encode from vector of allele strings to vector of numeric call codes * @param allele_string - vector of allele call strings to encode */ //template vector GenoCallCoder::encodeCallVec(const vector &allele_string) { vector GenoCallCoder::abstractAlleleVecToGenotypeCallNumVec(const vector &allele_string) { int i; vector encodedAlleles(allele_string.size()); for (i = 0; i < allele_string.size(); i++) { if (m_alleles.count(allele_string[i])) { encodedAlleles[i] = m_alleles[allele_string[i]]; } else { Verbose::warn(0, "Allele string '" + allele_string[i] + "' is not valid"); encodedAlleles[i] = 0; } } return encodedAlleles; } /** * @brief Retrieve a vector of allele call strings indexed by numeric call code */ // vector GenoCallCoder::getDecodeVector() { // const vector table = m_abstract_codes; // return table; // } /** * @brief boolean indicating if code is within proper range for given instance * @param code - numeric call code to test */ bool GenoCallCoder::isValidCallNum(const unsigned char call_code) { return ((call_code >= m_min_code && call_code <= m_max_code) || (call_code >= m_min_extra_code && call_code <= m_max_extra_code)); } /** * @brief boolean indicating if code is within proper range for given instance * @param code - numeric call code to test */ bool GenoCallCoder::isValidAbstractAllele(const string allele) { return m_alleles.count(allele); } /** * @brief boolean indicating if call code corresponds to a homozygous call. * @param call_code - numeric call code to test */ bool GenoCallCoder::isHom(const unsigned char call_code) { if (call_code >= m_min_translatable_code && call_code <= m_max_code) { string abs_allele = genotypeCallNumToAbstractAllele(call_code); char last_allele = abs_allele[abs_allele.size()-1]; //cout << abs_allele + ':' + first_allele + '\n'; if (m_alpha2num.count(last_allele) == 1) { //cout << "here1\n"; string search_string(1, last_allele); search_string.append(string(1, m_allele_delimiter)); if (abs_allele.find_first_not_of(search_string, 0) == string::npos) { //cout << "here2\n"; return true; } } } // ZeroCopyNumber is hom else if (call_code == 17) { return true; } return false; } /** * @brief boolean indicating if call code corresponds to a heterozygous call. false if call code corresponds to partial call. * @param call_code - numeric call code to test */ bool GenoCallCoder::isHet(const unsigned char call_code) { if (call_code >= m_min_translatable_code && call_code <= m_max_code) { string abs_allele = genotypeCallNumToAbstractAllele(call_code); char last_allele = abs_allele[abs_allele.size()-1]; if (m_alpha2num.count(last_allele) == 1) { string search_string(1, last_allele); search_string.append(string(1, m_allele_delimiter)); // cout << search_string << ':' << abs_allele << '\n'; if (abs_allele.find_first_not_of(search_string, 0) != string::npos) { return true; } } } return false; } affxparser/src/fusion/calvin_files/utils/src/GenoCallCoder.h0000644000175200017520000003544614516003651025243 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file GenoCallCoder.h * @author Ray Wheeler * @date Tue May 27 12:34:42 PDT 2008 * * @brief Genotype call en/decoder */ #ifndef _CALLCODER_H_ #define _CALLCODER_H_ // #include #include #include #include class AlleleRecord { public: /** * @brief Constructor * @param probeset_id - string identifying marker * @param abstract_allele - string (char) identifying allele independently of bases * @param reference_allele - string representing delimited list of design-time allele bases * @param report_allele - string representing delimited list of allele bases as they should be reported * @param allowable_cn_states = string representing delimited list of allowable copy number states for this marker * @param allele_count - number of alleles for this marker */ /* AlleleRecord(const std::string probeset_id, */ /* const std::string abstract_allele, */ /* const std::string reference_allele, */ /* const std::string report_allele, */ /* const int allele_count, */ /* const std::vector allowable_cn_states */ /* ) : */ /* m_probeset_id(probeset_id), */ /* m_abstract_allele(abstract_allele), */ /* m_reference_allele(reference_allele), */ /* m_report_allele(report_allele), */ /* m_allele_count(allele_count), */ /* m_allowable_cn_states(allowable_cn_states) */ /* {} */ /* AlleleRecord(const std::string probeset_id, */ /* const std::string abstract_allele, */ /* const std::string reference_allele, */ /* const std::string report_allele */ /* ) : */ /* m_probeset_id(probeset_id), */ /* m_abstract_allele(abstract_allele), */ /* m_reference_allele(reference_allele), */ /* m_report_allele(report_allele) */ /* { */ /* m_allele_count = 2; */ /* m_allowable_cn_states.push_back(2); */ /* } */ /* std::string m_probeset_id; */ /* std::string m_abstract_allele; */ /* std::string m_reference_allele; */ /* std::string m_report_allele; */ /* int m_allele_count; */ /* std::vector m_allowable_cn_states; */ AlleleRecord(const std::string probeset_id, const std::vector abstract_allele, const std::vector reference_allele, const std::vector report_allele, const int allele_count, const std::vector allowable_cn_states ); AlleleRecord(const std::string probeset_id, const std::vector abstract_allele, const std::vector reference_allele, const std::vector report_allele ); std::string m_probeset_id; std::vector m_abstract_allele; std::vector m_reference_allele; std::vector m_report_allele; int m_allele_count; std::vector m_allowable_cn_states; /** * @brief Function for sorting/searching a vector of allele records sorted by probeset_id * @param a - one of two records to compare for sorting * @param b - one of two records to compare for sorting */ /* static bool compareAlleleRecords(AlleleRecord a, AlleleRecord b) { */ /* if (a.m_probeset_id != b.m_probeset_id) { */ /* return (a.m_probeset_id < b.m_probeset_id); */ /* } */ /* else { */ /* return (a.m_abstract_allele < b.m_abstract_allele); */ /* } */ /* } */ static bool compareAlleleRecordsByProbesetId(AlleleRecord a, AlleleRecord b); }; /** * GenoCallCoder - Class for encoding and decoding numeric genotype call * codes to and from allele strings */ class GenoCallCoder { public: //typedef unsigned char code_t; // /** // * @brief Default constructor // */ // GenoCallCoder(); /** * @brief Constructor * @param max_allele_count - number of possible alleles for marker with highest allele count * @param data_size - string describing size of call field * @param allele_delimiter - character to delimit alleles in string * @param version - numeric version of coding scheme */ GenoCallCoder(const int max_allele_count=2, const std::string data_size="UCHAR", const std::string version="0", char allele_delimiter='\0'); /** * @brief Constructor * @param max_allele_count - number of possible alleles for marker with highest allele count * @param data_size - string describing size of call field * @param version - numeric version of coding scheme * @param allele_delimiter - character to delimit alleles in string * @param probeset_allele_name_table - vector of (size 3 or 4) vector containing probeset ids, allele names, abstract alleles, and optional boolean report RevComp fields */ //GenoCallCoder(const int max_allele_count, const std::string data_size, const std::string version, const char allele_delimiter, const std::vector > probeset_allele_name_table); /** * @brief Constructor * @param max_allele_count - number of possible alleles for marker with highest allele count * @param data_size - string describing size of call field * @param version - numeric version of coding scheme * @param allele_delimiter - character to delimit alleles in string * @param marker_annotation_file - TsvFile instance of marker annotation file with columns for probeset ids, allele names, abstract alleles, and optional boolean report RevComp fields */ GenoCallCoder(const int max_allele_count, const std::string data_size, const std::string version, char allele_delimiter, const std::string marker_annotation_file); /** * @brief Creates and populates marker translation data structure from given file path to marker annotation file * @param filepath - path to marker annotation file * @param use_report_revcomp - boolean to indicate whether report_revcomp should be utilized */ // static std::vector > getProbesetAlleleTranslationsFromTsvFile(std::string filepath, boolean use_report_revcomp); /** * @brief Populate member data structures from given input * @param max_allele_count - number of possible alleles for marker with highest allele count * @param data_size - string describing size of call field * @param version - numeric version of coding scheme * @param allele_delimiter - character to delimit alleles in string */ void initialize(const int max_allele_count, const std::string data_size, const std::string version, char allele_delimiter); /** * @brief Retrieve vector of probeset ids in allele name table */ std::vector getProbesetIds(); /** * @brief Retrieve vector of genotype call codes allowable for given probeset_id -- based on probeset's allele count and allowable copy number states. * @param probeset_id - id of probeset to get codes for */ std::vector getValidGenotypeCallCodes(std::string probeset_id); /** * @brief Decode from call code number to abstract allele string * @param call_code - number to decode */ // alternate names: // std::string decodeCall(const unsigned char call_code); std::string genotypeCallNumToAbstractAllele(const unsigned char call_code); /** * @brief Decode from call code number to design allele name string for * given probeset id * * @param probeset_id - id of probeset to decode * @param call_code - number to decode */ std::string genotypeCallNumToReferenceAllele(const std::string probeset_id, const unsigned char call_code); /** * @brief Decode from call code number to reporting allele name string for * given probeset id * * @param probeset_id - id of probeset to decode * @param call_code - number to decode */ std::string genotypeCallNumToReportAllele(const std::string probeset_id, const unsigned char call_code); /** * @brief Translate design allele name to reporting allele name for given probeset id * * @param probeset_id - id of probeset to decode * @param reference_allele - string of design alleles to be converted to report alleles */ std::string referenceAlleleToReportAllele(const std::string probeset_id, const std::string reference_allele); /** * @brief Translate abstract allele to design allele name for given * probeset id * * @param probeset_id - id of probeset to decode * @param abstract_allele - abstract allele to decode */ std::string abstractAlleleToReferenceAllele(const std::string probeset_id, const std::string abstract_allele); /** * @brief Translate abstract allele to reporting allele name for given * probeset id * * @param probeset_id - id of probeset to decode * @param abstract_allele - abstract allele to decode */ std::string abstractAlleleToReportAllele(const std::string probeset_id, const std::string abstract_allele); // std::string alleleNameToAllele(const std::string probeset_id, const std::string allele_name); /** * @brief Decode from call code number to allele name string for * given probeset id * * @param probeset_id - id of probeset to decode * @param alleleName - number to decode */ //unsigned char alleleNameToGenotypeCallNum(const std::string probeset_id, const std::string allele_name); // Not sure how to handle probeset names with these methods /* std::vector CallVecToAlleleNameVec(const std::vector &call_codes); */ /* std::vector AlleleVecToAlleleNameVec(const std::vector &alleles); */ /* std::vector AlleleNameVecToAlleleVec(const std::vector &allele_names); */ /* std::vector AlleleNameVecToCallVec(const std::vector &allele_names); */ /** * @brief Encode from allele string to numeric call code * @param allele_string - string of allele calls to encode */ unsigned char abstractAlleleToGenotypeCallNum(const std::string allele_string); /** * @brief Takes vector of abstract allele characters encoded as * integers and returns genotype call number * @param allele_string - string of allele calls to encode */ unsigned char abstractAlleleIntegersToGenotypeCallNum(const std::vector abstract_allele_ints); /** * @brief Decode vector of call code numbers to vector of allele strings * @param call_codes - vector of numbers to decode */ std::vector genotypeCallNumVecToAbstractAlleleVec(const std::vector &call_codes); /** * @brief Encode from vector of allele strings to vector of numeric call codes * @param allele_string - vector of allele call strings to encode */ std::vector abstractAlleleVecToGenotypeCallNumVec(const std::vector &allele_string); /** * @brief Retrieve a vector of abstract allele calls indexed by numeric call code */ std::vector getAbstractAlleleVector() {std::vector rtn_vec(m_abstract_codes); return rtn_vec;} /** * @brief boolean indicating if code is within proper range for given GenoCallCoder instance * @param call_code - numeric call code to test */ bool isValidCallNum(const unsigned char call_code); /** * @brief boolean indicating if allele string is within proper set of possible alleles for given GenoCallCoder instance * @param allele - string of alleles to test */ bool isValidAbstractAllele(const std::string allele); /** * @brief boolean indicating if call code corresponds to a homozygous call. * @param call_code - numeric call code to test */ /** * @brief boolean indicating if call code corresponds to a homozygous call * @param call_code - numeric call code to test */ bool isHom(const unsigned char call_code); /** * @brief boolean indicating if call code corresponds to a heterozygous call * @param call_code - numeric call code to test */ bool isHet(const unsigned char call_code); private: /// annotation csv file intra-column delimiter static const std::string m_annotation_csv_delimiter; /// extra codes static const std::string m_extra_codes[]; /// list of columns to grab from marker annotation file static const std::string m_marker_annotation_file_cnames[]; /// version zero codes static const std::string m_version_zero_codes[]; /// character to represent a nocall when making partial calls (v1+) static const char m_abstract_nocall_char = 'n'; /// string in annotation csv file indicating that there are no alleles to decode static const std::string m_annot_csv_skip_text; /// alphabet map static const char m_num2alpha[]; /// number of alleles for marker with highest allele count int m_max_allele_count; /// string arg indicating size of call code field std::string m_data_size; /// number of bits used for encoding int m_data_size_bits; /// version dependent min call code unsigned int m_min_code; /// version dependent min translatable call code unsigned int m_min_translatable_code; /// data_size dependent max call code unsigned int m_max_code; /// version dependent min call code for extra codes unsigned int m_min_extra_code; /// version dependent max call code for extra codes unsigned int m_max_extra_code; /// version std::string m_version; /// character to delimit alleles in allele string char m_allele_delimiter; /// hard-coded codes, for now std::vector m_abstract_codes; /// hard-coded code map, for now std::map m_alleles; /// table relating abstract alleles to allele names for each marker std::vector m_probeset_allele_name_table; /// legacy code map // map m_legacy_alleles; //alphabet map to integers std::map m_alpha2num; std::string genotypeCallNumToAllele(const std::string probeset_id, const unsigned char call_code, const std::string allele_type); std::string alleleNameConvert(const std::string probeset_id, const std::string input_allele_string, const std::string input_type, const std::string output_type); }; #endif /* _CALLCODER_H_ */ affxparser/src/fusion/calvin_files/utils/src/StringUtils.cpp0000644000175200017520000001125114516003651025410 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/utils/src/StringUtils.h" // #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) // ignore deprecated functions warning #endif using namespace affymetrix_calvin_utilities; /* * Trim chTarget from the left of string s. */ void StringUtils::STLTrimLeft(std::string& s, char chTarget) { std::string::size_type n = s.find_first_not_of( chTarget ); if( n == std::string::npos ) return; s = s.substr( n ); } /* * Trim space characters (' ') from the left of string s. */ void StringUtils::STLTrimLeft(std::string& s) { STLTrimLeft( s, ' ' ); } /* * Trim chTarget from the right of string s. */ void StringUtils::STLTrimRight(std::string& s, char chTarget) { std::string::size_type n = s.find_last_not_of( chTarget ); if( n == std::string::npos ) s.clear(); else s = s.substr( 0, n+1 ); } /* * Trim space characters (' ') from the right of string s. */ void StringUtils::STLTrimRight(std::string& s) { STLTrimRight( s, ' ' ); } /* * Trim chTarget from the left of string s. */ void StringUtils::STLTrimLeft(std::wstring& s, wchar_t chTarget) { std::wstring::size_type n = s.find_first_not_of( chTarget ); if( n == std::wstring::npos ) return; s = s.substr( n ); } /* * Trim space characters (' ') from the left of string s. */ void StringUtils::STLTrimLeft(std::wstring& s) { STLTrimLeft( s, ' ' ); } /* * Trim chTarget from the right of string s. */ void StringUtils::STLTrimRight(std::wstring& s, wchar_t chTarget) { std::wstring::size_type n = s.find_last_not_of( chTarget ); if( n == std::wstring::npos ) s.clear(); else s = s.substr( 0, n+1 ); } /* * Trim space characters (' ') from the right of string s. */ void StringUtils::STLTrimRight(std::wstring& s) { STLTrimRight( s, ' ' ); } /* * Convert wide character string to a multi-byte character string. */ std::string StringUtils::ConvertWCSToMBS(const std::wstring& source) { char* szSource = new char[ source.length()+1 ]; wcstombs( szSource, source.c_str(), source.length()+1 ); std::string result = szSource; delete [] szSource; return result; } /* * Convert multi-byte character string to a wide character character string. */ std::wstring StringUtils::ConvertMBSToWCS(const std::string& source) { wchar_t* szSource = new wchar_t[ source.length()+1 ]; mbstowcs( szSource, source.c_str(), source.length()+1 ); std::wstring result = szSource; delete [] szSource; return result; } /* * Convert an integer to a string. */ std::wstring StringUtils::ToString(int value, int digits, wchar_t fill) { #if defined(_GLIBCPP_USE_WCHAR_T) || defined(_GLIBCXX_USE_WCHAR_T) || defined(_MSC_VER) std::wostringstream str; str << std::setw(digits) << std::setfill(fill) << value; return str.str(); #else throw "No implementation"; #endif } /* * Split a string using the separator as the delimiter */ std::vector StringUtils::Split(const std::string &inputString, const std::string &separator) { std::vector tokens; size_t substrBegin = 0; for (;;) { size_t substrEnd = inputString.find (separator, substrBegin); if (substrEnd == std::string::npos) { // No more ',' - save what's left, quit. std::string subString = inputString.substr (substrBegin); // Avoid returning a null string from a terminating ',' or an empty inputString. if (! subString.empty()) tokens.push_back (subString); break; } // Avoid null strings from an initial ',' or ',,'. if (substrEnd != substrBegin) tokens.push_back (inputString.substr (substrBegin, substrEnd - substrBegin) ); // Continue following the ',' substrBegin = substrEnd + 1; } return tokens; } affxparser/src/fusion/calvin_files/utils/src/StringUtils.h0000644000175200017520000001534314516003651025063 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef __STRINGUTILS_H__INCLUDED #define __STRINGUTILS_H__INCLUDED /*! \file StringUtils.h This file provides string utilities. */ #include #include #include // #ifdef __MINGW32__ #define USE_OLD_SWPRINTF #endif namespace affymetrix_calvin_utilities { /*! This class provides utility functions for strings. */ class StringUtils { public: /*! Trim chTarget from the left of string s. * @param s String from which to remove characters. * @param chTarget Character to trim. */ static void STLTrimLeft(std::string& s, char chTarget); /*! Trim space characters (' ') from the left of string s. * @param s String from which to remove characters. */ static void STLTrimLeft(std::string& s); /*! Trim chTarget from the right of string s. * @param s String from which to remove characters. * @param chTarget Character to trim. */ static void STLTrimRight(std::string& s, char chTarget); /*! Trim space characters (' ') from the right of string s. * @param s String from which to remove characters. */ static void STLTrimRight(std::string& s); /*! Trim chTarget from the left of string s. * @param s String from which to remove characters. * @param chTarget Character to trim. */ static void STLTrimLeft(std::wstring& s, wchar_t chTarget); /*! Trim space characters (' ') from the left of string s. * @param s String from which to remove characters. */ static void STLTrimLeft(std::wstring& s); /*! Trim chTarget from the right of string s. * @param s String from which to remove characters. * @param chTarget Character to trim. */ static void STLTrimRight(std::wstring& s, wchar_t chTarget); /*! Trim space characters (' ') from the right of string s. * @param s String from which to remove characters. */ static void STLTrimRight(std::wstring& s); /*! Convert wide character string to a multi-byte character string. * @param source Wide character string to convert. * @return Converted multi-byte character string. */ static std::string ConvertWCSToMBS(const std::wstring& source); /*! Convert multi-byte character string to a wide character character string. * @param source Multi-byte character string to convert. * @return Converted wide character character string. */ static std::wstring ConvertMBSToWCS(const std::string& source); /*! Convert an integer to a string. * @param value The integer * @param digits The number of digits for the resulting string * @param fill The value to fill in empty spaces. */ static std::wstring ToString(int value, int digits, wchar_t fill=L'0'); /* * Split a string using the separator as the delimiter * @param inputString The string to split * @param separator The delimiter */ static std::vector Split(const std::string &inputString, const std::string &separator); }; } /*! Formats the data into a string. * @param buffer The buffer to fill. * @param count The size of the buffer. * @param format The format string. * @param value1 The first value to add. */ #ifdef USE_OLD_SWPRINTF #define FormatString1(buffer, count, format, value1) swprintf(buffer, format, value1) #else #define FormatString1(buffer, count, format, value1) swprintf(buffer, count, format, value1) #endif /*! Formats the data into a string. * @param buffer The buffer to fill. * @param count The size of the buffer. * @param format The format string. * @param value1 The first value to add. * @param value2 The second value to add. */ #ifdef USE_OLD_SWPRINTF #define FormatString2(buffer, count, format, value1, value2) swprintf(buffer, format, value1, value2) #else #define FormatString2(buffer, count, format, value1, value2) swprintf(buffer, count, format, value1, value2) #endif /*! Formats the data into a string. * @param buffer The buffer to fill. * @param count The size of the buffer. * @param format The format string. * @param value1 The first value to add. * @param value2 The second value to add. * @param value3 The third value to add. */ #ifdef USE_OLD_SWPRINTF #define FormatString3(buffer, count, format, value1, value2, value3) swprintf(buffer, format, value1, value2, value3) #else #define FormatString3(buffer, count, format, value1, value2, value3) swprintf(buffer, count, format, value1, value2, value3) #endif /*! Formats the data into a string. * @param buffer The buffer to fill. * @param count The size of the buffer. * @param format The format string. * @param value1 The first value to add. * @param value2 The second value to add. * @param value3 The third value to add. * @param value4 The fourth value to add. */ #ifdef USE_OLD_SWPRINTF #define FormatString4(buffer, count, format, value1, value2, value3, value4) swprintf(buffer, format, value1, value2, value3, value4) #else #define FormatString4(buffer, count, format, value1, value2, value3, value4) swprintf(buffer, count, format, value1, value2, value3, value4) #endif /*! Formats the data into a string. * @param buffer The buffer to fill. * @param count The size of the buffer. * @param format The format string. * @param value1 The first value to add. * @param value2 The second value to add. * @param value3 The third value to add. * @param value4 The fourth value to add. * @param value5 The fifth value to add. */ #ifdef USE_OLD_SWPRINTF #define FormatString5(buffer, count, format, value1, value2, value3, value4, value5) swprintf(buffer, format, value1, value2, value3, value4, value5) #else #define FormatString5(buffer, count, format, value1, value2, value3, value4, value5) swprintf(buffer, count, format, value1, value2, value3, value4, value5) #endif #endif //__STRINGUTIL_H__INCLUDED affxparser/src/fusion/calvin_files/utils/src/Subgrids.h0000644000175200017520000000242714516003651024355 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _Subgrids_HEADER_ #define _Subgrids_HEADER_ #include "calvin_files/utils/src/Coords.h" // #include // /*! \file Subgrids.h This file defines a subgrid. */ namespace affymetrix_calvin_utilities { /*! Defines a subgrid */ class Subgrid { public: int32_t row; // ??? int32_t col; // ??? FGridCoords pixelgrid; GridCoords cellgrid; //bool operator==( Subgrid& rhs ); }; typedef std::vector SubgridVector; } #endif affxparser/src/fusion/calvin_files/utils/src/checksum.cpp0000644000175200017520000000324414516003651024726 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/utils/src/checksum.h" // using namespace affymetrix_calvin_utilities; /* * Store the one's complement of the checksum in the header of the data. * To validate the data, compute the one's complement of the data. The * resulting checksum should be zero. */ u_int16_t CheckSum::OnesComplementCheckSum(void *addr, size_t size) { u_int16_t *pdata = (u_int16_t *)addr; u_int32_t sum = 0; u_int16_t checksum; // Compute Internet Checksum for "size" bytes // beginning at location "addr". while( size > 1 ) { sum += *pdata++; size -= 2; } // Add left-over byte, if any if( size > 0 ) sum += * (unsigned char *) pdata; // Fold 32-bit sum to 16 bits while (sum>>16) sum = (sum & 0xffff) + (sum >> 16); // Compute the one's complement of the checksum. checksum = (u_int16_t) ~sum; return checksum; } affxparser/src/fusion/calvin_files/utils/src/checksum.h0000644000175200017520000000270614516003651024375 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _checksum_HEADER #define _checksum_HEADER /*! \file checksum.h This file provides functions for computing a checksum. */ #include "calvin_files/portability/src/AffymetrixBaseTypes.h" // #include // namespace affymetrix_calvin_utilities { /*! A class to provide checksum functions */ class CheckSum { public: /*! * Computes a one's complement checksum. * * @param addr The memory address of the data. * @param size The number of bytes pointed to by the memory address. * @return The ones compliment checksum. */ static u_int16_t OnesComplementCheckSum(void *addr, size_t size); }; }; #endif affxparser/src/fusion/calvin_files/writers/0000755000175200017520000000000014516003651022165 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/writers/src/0000755000175200017520000000000014516003651022754 5ustar00biocbuildbiocbuildaffxparser/src/fusion/calvin_files/writers/src/ArrayFileWriter.cpp0000644000175200017520000003101514516003651026533 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/ArrayFileWriter.h" // #include "calvin_files/array/src/ArrayId.h" #include "calvin_files/parsers/src/SAXArrayHandlers.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include #include #include #include // #include #include #include #include #include // //#include using namespace affymetrix_calvin_array; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; XERCES_CPP_NAMESPACE_USE #ifdef _MSC_VER #pragma warning(disable: 4996) // ignore deprecated functions warning #endif /*! The expected version number */ #define ARRAY_SET_FILE_VERSION_NUMBER "1.0" /*! This class provides utilities for converting native strings to XML strings. * This class is provided for platforms where the XMLCh is not a wchar_t (Mac OSX) */ class XMLChConversion { private: /*! The XML string */ XMLCh *str; /*! converts an int. * @param i The int. */ void convert(int i) { char cstr[64]; sprintf(cstr, "%d", i); convert(cstr); } /*! Converts a 8 bit string. * @param s The 8 bit string. */ void convert(const char *const s) { clear(); int n=(int)strlen(s); str = new XMLCh[n+1]; for (int i=0; icreateElement(ToXMLCh(ARRAY_FILE_ELEMENT)); arrayElement->setAttribute(ToXMLCh(ARRAY_FILE_ELEMENT_TYPE_ATTRIBUTE), ToXMLCh(ARRAY_SET_FILE_TYPE_IDENTIFIER)); arrayElement->setAttribute(ToXMLCh(ARRAY_FILE_ELEMENT_VERSION_ATTRIBUTE), ToXMLCh(ARRAY_SET_FILE_VERSION_NUMBER)); arrayElement->setAttribute(ToXMLCh(ARRAY_FILE_ELEMENT_ID_ATTRIBUTE), ToXMLCh(arrayData.ArraySetFileIdentifier())); arrayElement->setAttribute(ToXMLCh(ARRAY_FILE_ELEMENT_PROJECT_ATTRIBUTE), ToXMLCh(arrayData.InitialProject())); arrayElement->setAttribute(ToXMLCh(ARRAY_FILE_ELEMENT_CREATE_DATE_TIME_ATTRIBUTE), ToXMLCh(arrayData.CreationDateTime())); arrayElement->setAttribute(ToXMLCh(ARRAY_FILE_ELEMENT_CREATED_BY_ATTRIBUTE), ToXMLCh(arrayData.CreatedBy())); std::wstring step = CreateStepToString(arrayData.CreatedStep()); arrayElement->setAttribute(ToXMLCh(ARRAY_FILE_ELEMENT_CREATED_STEP_ATTRIBUTE), ToXMLCh(step)); return arrayElement; } /* * Add the physical arrays to the document if they exist. */ void AddPhysicalArrays(ArrayData &arrayData, DOMDocument* doc, DOMElement* arrayElement) { int nArrays = (int) arrayData.PhysicalArraysAttributes().size(); if (nArrays > 0 ) { DOMElement* physicalArraysElement = doc->createElement(ToXMLCh(PHYSICAL_ARRAYS_ELEMENT)); for (int iArray=0; iArraycreateElement(ToXMLCh(PHYSICAL_ARRAY_ELEMENT)); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_ARRAY_TYPE_ATTRIBUTE), ToXMLCh(ARRAY_TYPE_IDENTIFIER)); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_ID_ATTRIBUTE), ToXMLCh(att.Identifier())); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_NAME_ATTRIBUTE), ToXMLCh(att.ArrayName())); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_BARCODE_ATTRIBUTE), ToXMLCh(att.ArrayBarcode())); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_TYPE_ATTRIBUTE), ToXMLCh(MediaToString(att.Media()))); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_ROW_ATTRIBUTE), ToXMLCh(att.MediaRow())); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_COL_ATTRIBUTE), ToXMLCh(att.MediaCol())); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_MEDIA_FILE_NAME_ATTRIBUTE), ToXMLCh(att.MediaFileName())); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_MEDIA_FILE_GUID_ATTRIBUTE), ToXMLCh(att.MediaFileGUID())); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_LIB_PACKAGE_NAME_ATTRIBUTE), ToXMLCh(att.LibraryPackageName())); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_MASTERFILE_GUID_ATTRIBUTE), ToXMLCh(att.MasterFileId())); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_CREATED_BY_ATTRIBUTE), ToXMLCh(att.CreatedBy())); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_CREATION_DATE_ATTRIBUTE), ToXMLCh(att.CreationDateTime())); physicalArrayElement->setAttribute(ToXMLCh(ARRAY_FILE_ELEMENT_CREATED_STEP_ATTRIBUTE), ToXMLCh(CreateStepToString(att.CreatedStep()))); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_MASTERFILE_ATTRIBUTE), ToXMLCh(att.MasterFile())); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_PAT_ASSIGNMENT_ATTRIBUTE), ToXMLCh(PATAssignmentMethodToString(att.PatAssignment()))); physicalArrayElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ELEMENT_COMMENT_ATTRIBUTE), ToXMLCh(att.Comment())); int nArraysAttributes = (int) att.Attributes().size(); for (int iArrayAttribute=0; iArrayAttributecreateElement(ToXMLCh(PHYSICAL_ARRAY_ATTRIBUTE_ELEMENT)); paramElement->setAttribute(ToXMLCh(PHYSICAL_ARRAY_ATTRIBUTE_ELEMENT_NAME_ATTRIBUTE), ToXMLCh(param.Name)); paramElement->setTextContent(ToXMLCh(param.Value)); physicalArrayElement->appendChild(paramElement); } physicalArraysElement->appendChild(physicalArrayElement); } arrayElement->appendChild(physicalArraysElement); } } /* * Add the user attributes to the document if they exist. */ void AddUserAttributes(ArrayData &arrayData, DOMDocument* doc, DOMElement* arrayElement) { int nUserAttributes = (int) arrayData.UserAttributes().size(); if (nUserAttributes > 0) { DOMElement* userAttributesElement = doc->createElement(ToXMLCh(USER_ATTRIBUTES_ELEMENT)); affymetrix_calvin_parameter::ParameterNameValueDefaultRequiredTypeList::iterator paramIt; for (paramIt=arrayData.UserAttributes().begin(); paramIt!=arrayData.UserAttributes().end(); ++ paramIt) { ParameterNameValueDefaultRequiredType ¶m = *paramIt; DOMElement* paramElement = doc->createElement(ToXMLCh(USER_ATTRIBUTES_ATTRIBUTE_ELEMENT)); paramElement->setAttribute(ToXMLCh(USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_NAME_ATTRIBUTE), ToXMLCh(param.GetName())); paramElement->setAttribute(ToXMLCh(USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_TYPE_ATTRIBUTE), ToXMLCh( ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(param.ValueType()))); if (param.RequiredFlag() == true) paramElement->setAttribute(ToXMLCh(USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_REQUIRED_ATTRIBUTE), ToXMLCh("true")); if (param.HasDefault() == true) paramElement->setAttribute(ToXMLCh(USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_DEFAULT_ATTRIBUTE), ToXMLCh(param.DefaultToString())); if (param.ValueType() == ParameterNameValueDefaultRequiredType::ControlMultiParameterType) { std::list::iterator valIt; for (valIt = param.ControlMultiValues().begin(); valIt != param.ControlMultiValues().end(); ++valIt) { DOMElement *valueElement = doc->createElement(ToXMLCh(USER_ATTRIBUTES_ATTRIBUTE_VALUE_ELEMENT)); valueElement->setTextContent(ToXMLCh(*valIt)); paramElement->appendChild(valueElement); } } else { DOMElement *valueElement = doc->createElement(ToXMLCh(USER_ATTRIBUTES_ATTRIBUTE_VALUE_ELEMENT)); valueElement->setTextContent(ToXMLCh(param.ToString())); paramElement->appendChild(valueElement); } int nControl = (int) param.ControlledVocabulary().size(); if (nControl > 0) { std::list::iterator controlIt; for (controlIt=param.ControlledVocabulary().begin(); controlIt!=param.ControlledVocabulary().end(); ++controlIt) { DOMElement* controlElement = doc->createElement(ToXMLCh(USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT)); controlElement->setAttribute(ToXMLCh(USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT_VALUE_ATTRIBUTE), ToXMLCh(*controlIt)); paramElement->appendChild(controlElement); } } userAttributesElement->appendChild(paramElement); } arrayElement->appendChild(userAttributesElement); } } /* * Initialize the class. */ ArrayFileWriter::ArrayFileWriter() { dataTypeIdentifier = ARRAY_FILE_TYPE_IDENTIFIER; } /* * Clear the data. */ ArrayFileWriter::~ArrayFileWriter() { } /* * Write the entire file, the header and body. */ bool ArrayFileWriter::Write(const std::string &fileName, affymetrix_calvin_array::ArrayData &arrayData) { // Initialize the XML4C2 system. try { XMLPlatformUtils::Initialize(); } catch (const XMLException&) { return false; } // Create a DOM implementation object and create the document type for it. DOMImplementation *impl = DOMImplementationRegistry::getDOMImplementation(ToXMLCh(L"LS")); //DOMDocumentType* dt = impl->createDocumentType(ToXMLCh(ARRAY_FILE_ELEMENT), 0, ToXMLCh(ARRAY_FILE_DTD)); DOMDocument* doc = impl->createDocument(); //doc->setStandalone(true); //doc->appendChild(dt); // Create the serializer. DOMLSSerializer *theSerializer = ((DOMImplementationLS*)impl)->createLSSerializer(); DOMLSOutput *theOutputDesc = ((DOMImplementationLS*)impl)->createLSOutput(); //theSerializer->setEncoding(ToXMLCh(ARRAY_FILE_ENCODING)); theOutputDesc->setEncoding(ToXMLCh(ARRAY_FILE_ENCODING)); // ArrayFile element DOMElement* arrayElement = CreateArrayElement(arrayData, doc, dataTypeIdentifier); // PhysicalArrays element AddPhysicalArrays(arrayData, doc, arrayElement); // UserAttributes element AddUserAttributes(arrayData, doc, arrayElement); // Add the array element to the document. doc->appendChild(arrayElement); // Write the file. bool status = false; XMLFormatTarget *myFormTarget = new LocalFileFormatTarget(fileName.c_str()); theOutputDesc->setByteStream(myFormTarget); try { theSerializer->write(doc, theOutputDesc); status = true; } catch (...) { status = false; } // Clean up doc->release(); theOutputDesc->release(); theSerializer->release(); delete myFormTarget; XMLPlatformUtils::Terminate(); return status; } affxparser/src/fusion/calvin_files/writers/src/ArrayFileWriter.h0000644000175200017520000000364314516003651026206 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _ArrayFileWriter_HEADER_ #define _ArrayFileWriter_HEADER_ /*! \file ArrayFileWriter.h This file provides interfaces to write an array file. */ #include "calvin_files/array/src/ArrayData.h" #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/writers/src/FileWriteException.h" // #include #include #include // namespace affymetrix_calvin_io { /*! This class provides interfaces to write an array file. */ class ArrayFileWriter { public: /*! Constructor */ ArrayFileWriter(); /*! Destructor */ ~ArrayFileWriter(); protected: /*! An identifier to the type of data stored in the file */ affymetrix_calvin_utilities::AffymetrixGuidType dataTypeIdentifier; public: /*! Writes the array object to an array file. * * @param fileName The name of the array file to write. * @param arrayData The array data to write to the file. * @return True if the file was successfully written. */ bool Write(const std::string &fileName, affymetrix_calvin_array::ArrayData &arrayData); }; }; #endif // _ArrayFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/AuditFileWriter.cpp0000644000175200017520000000571314516003651026531 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/AuditFileWriter.h" // #include "calvin_files/parsers/src/AuditFileConstants.h" #include "calvin_files/utils/src/StringUtils.h" // #include // using namespace affymetrix_calvin_array; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_io; using namespace std; /* * Initialize the class. */ AuditFileWriter::AuditFileWriter() { } /* * Clear the data. */ AuditFileWriter::~AuditFileWriter() { } /* * Write the entry members in a tag=value style. Enclose the entry in a #Entry tag. */ bool AuditFileWriter::Write(const std::string &fileName, affymetrix_calvin_array::ArrayAuditEntry &auditData) { ofstream auditFile(fileName.c_str(), ios::out | ios::app); if (!auditFile) return false; auditFile << AUDIT_START_TAG << endl; auditFile << AUDIT_USER_TAG << "=" << StringUtils::ConvertWCSToMBS(auditData.UserName()) << endl; auditFile << AUDIT_DATE_TAG << "=" << StringUtils::ConvertWCSToMBS(auditData.DateTime().Date()) << endl; auditFile << AUDIT_TIME_TAG << "=" << StringUtils::ConvertWCSToMBS(auditData.DateTime().Time()) << endl; auditFile << AUDIT_ACTION_TAG << "=" << auditData.ActionType() << endl; auditFile << AUDIT_ARRAY_ID_TAG << "=" << auditData.ArrayGuid() << endl; for (AffymetrixGuidTypeList::iterator it = auditData.InputFileGuids().begin(); it != auditData.InputFileGuids().end(); ++it) { AffymetrixGuidType guid = (*it); auditFile << AUDIT_INPUT_ID_TAG << "=" << guid << endl; } for (AffymetrixGuidTypeList::iterator it = auditData.OutputFileGuids().begin(); it != auditData.OutputFileGuids().end(); ++it) { AffymetrixGuidType guid = (*it); auditFile << AUDIT_OUTPUT_ID_TAG << "=" << guid << endl; } for (ParameterNameValuePairList::iterator it = auditData.ActionParameters().begin(); it != auditData.ActionParameters().end(); ++it) { ParameterNameValuePair param = (*it); auditFile << StringUtils::ConvertWCSToMBS(param.Name) << "=" << StringUtils::ConvertWCSToMBS(param.Value) << endl; } auditFile << AUDIT_END_TAG << endl; auditFile.close(); return (auditFile.fail() == 0); } affxparser/src/fusion/calvin_files/writers/src/AuditFileWriter.h0000644000175200017520000000315114516003651026170 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AuditFileWriter_HEADER_ #define _AuditFileWriter_HEADER_ /*! \file AuditFileWriter.h This file provides interfaces to write an audit file. */ #include "calvin_files/array/src/ArrayAudit.h" // namespace affymetrix_calvin_io { /*! This class provides interfaces to write an audit file. */ class AuditFileWriter { public: /*! Constructor */ AuditFileWriter(); /*! Destructor */ ~AuditFileWriter(); public: /*! Writes the audit object to an audit file. * * @param fileName The name of the audit file to write. * @param auditData The audit data to write to the file. * @return True if the file was successfully written. */ bool Write(const std::string &fileName, affymetrix_calvin_array::ArrayAuditEntry &auditData); }; }; #endif // _AuditFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CDFCntrlFileWriter.cpp0000644000175200017520000000616314516003651027062 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CDFCntrlFileWriter.h" // using namespace affymetrix_calvin_io; CDFCntrlFileWriter::CDFCntrlFileWriter(CDFData &p) { writer = new GenericFileWriter(p.GetFileHeader()); contentsPos = 0; WriteHeaders(); dataGroupWriter = 0; //skip ahead to the position of the first probe set dataSet int32_t probes = p.GetProbeSetCnt(); int32_t size = probes * ((MAX_CDF_PROBE_SET_NAME_LENGTH * 2) + 8); writer->SeekFromCurrentPos(size); // Set the file position of the next DataGroup writer->GetDataGroupWriter(0).UpdateNextDataGroupPos(); } CDFCntrlFileWriter::~CDFCntrlFileWriter() { delete writer; if(dataGroupWriter != 0) delete dataGroupWriter; } void CDFCntrlFileWriter::WriteHeaders() { writer->WriteHeader(); DataGroupWriter* dataGroupWriter = &writer->GetDataGroupWriter(0); dataGroupWriter->WriteHeader(); contentsWriter = &dataGroupWriter->GetDataSetWriter(0); contentsWriter->WriteHeader(); contentsPos = writer->GetFilePos(); } CDFCntrlProbeSetWriter* CDFCntrlFileWriter::GetCntrlProbeSetWriter(const std::wstring& probeSetType) { dataSetHdr.Clear(); dataSetHdr.SetName(probeSetType); ParameterNameValueType t1; t1.SetName(QC_PROBESET_TYPE); t1.SetValueText(probeSetType); dataSetHdr.AddNameValParam(t1); dataSetHdr.AddUShortColumn(L""); dataSetHdr.AddUShortColumn(L""); dataSetHdr.AddUByteColumn(L""); dataSetHdr.AddUByteColumn(L""); dataSetHdr.AddUByteColumn(L""); return new CDFCntrlProbeSetWriter(dataGroupWriter->CreateDataSetWriter(dataSetHdr)); } void CDFCntrlFileWriter::OpenDataGroup(const std::wstring& probeSetType, int dataSetCnt) { DataGroupHeader dataGroup(probeSetType); for(int i = 0; i < dataSetCnt; i++) { DataSetHeader dsh; dataGroup.AddDataSetHdr(dsh); } dataGroupWriter = writer->CreateDataGroupWriter(dataGroup); WriteContentsEntry(probeSetType); dataGroupWriter->WriteHeader(); } void CDFCntrlFileWriter::CloseDataGroup() { dataGroupWriter->Close(); delete dataGroupWriter; dataGroupWriter = 0; } void CDFCntrlFileWriter::WriteContentsEntry(const std::wstring& probeSetType) { u_int32_t currentPos = writer->GetFilePos(); writer->SeekFromBeginPos(contentsPos); contentsWriter->Write(probeSetType, 32); contentsWriter->Write(currentPos); contentsPos = writer->GetFilePos(); writer->SeekFromBeginPos(currentPos); } affxparser/src/fusion/calvin_files/writers/src/CDFCntrlFileWriter.h0000644000175200017520000000407414516003651026526 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CDFCntrlFileWriter_HEADER_ #define _CDFCntrlFileWriter_HEADER_ #include "calvin_files/data/src/CDFData.h" #include "calvin_files/writers/src/CDFCntrlProbeSetWriter.h" #include "calvin_files/writers/src/GenericFileWriter.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { static const std::wstring QC_PROBESET_TYPE = L"QC Probeset Type"; class CDFCntrlFileWriter { private: GenericFileWriter* writer; DataGroupWriter* dataGroupWriter; DataSetWriter* contentsWriter; DataSetHeader dataSetHdr; int32_t contentsPos; int32_t currentCubePos; public: CDFCntrlFileWriter(CDFData &p); ~CDFCntrlFileWriter(); CDFCntrlProbeSetWriter* GetCntrlProbeSetWriter(const std::wstring& probeSetType); /*! Prepares a data group for writing @param probeSetName Probe set name @param dataSetCnt number of data sets to be written */ void OpenDataGroup(const std::wstring& probeSetName, int dataSetCnt); /*! Closes data group after writing */ void CloseDataGroup(); private: void WriteHeaders(); void WriteContentsEntry(const std::wstring& probeSetType); }; } #endif // _CDFCntrlFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CDFCntrlProbeSetWriter.cpp0000644000175200017520000000274114516003651027724 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CDFCntrlProbeSetWriter.h" // using namespace affymetrix_calvin_io; void CDFCntrlProbeSetWriter::WriteHeader() { writer->WriteHeader(); } void CDFCntrlProbeSetWriter::Close() { writer->UpdateNextDataSetOffset(); delete writer; writer = 0; } void CDFCntrlProbeSetWriter::Write(u_int16_t xCoord, u_int16_t yCoord, u_int8_t probeLength, u_int8_t perfectMatchFlag, u_int8_t backgroundProbeFlag) { writer->Write(xCoord); writer->Write(yCoord); writer->Write(probeLength); writer->Write(perfectMatchFlag); writer->Write(backgroundProbeFlag); } affxparser/src/fusion/calvin_files/writers/src/CDFCntrlProbeSetWriter.h0000644000175200017520000000311314516003651027363 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CDFCntrlProbeSetWriter_HEADER_ #define _CDFCntrlProbeSetWriter_HEADER_ #include "calvin_files/writers/src/DataSetWriter.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class CDFCntrlProbeSetWriter { public: CDFCntrlProbeSetWriter(DataSetWriter* dpw) { writer = dpw; }; ~CDFCntrlProbeSetWriter() { delete writer; }; private: DataSetWriter* writer; public: void WriteHeader(); void Close(); void Write(u_int16_t xCoord, u_int16_t yCoord, u_int8_t probeLength, u_int8_t perfectMatchFlag, u_int8_t backgroundProbeFlag); }; } #endif // _CDFCntrlProbeSetWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CDFFileWriter.cpp0000644000175200017520000001037014516003651026052 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CDFFileWriter.h" // #include "calvin_files/data/src/CDFData.h" // using namespace affymetrix_calvin_io; CDFFileWriter::CDFFileWriter(CDFData &p) { writer = new GenericFileWriter(p.GetFileHeader()); contentsPos = 0; WriteHeaders(); dataGroupWriter = 0; //skip ahead to the position of the first probe set dataSet int32_t probes = p.GetProbeSetCnt(); int32_t size = probes * ((MAX_CDF_PROBE_SET_NAME_LENGTH * 2) + 8); writer->SeekFromCurrentPos(size); // Set the file position of the next DataGroup writer->GetDataGroupWriter(0).UpdateNextDataGroupPos(); } CDFFileWriter::~CDFFileWriter() { delete writer; if(dataGroupWriter != 0) delete dataGroupWriter; } void CDFFileWriter::WriteHeaders() { writer->WriteHeader(); DataGroupWriter* dataGroupWriter = &writer->GetDataGroupWriter(0); dataGroupWriter->WriteHeader(); contentsWriter = &dataGroupWriter->GetDataSetWriter(0); contentsWriter->WriteHeader(); contentsPos = writer->GetFilePos(); } CDFProbeSetWriter* CDFFileWriter::CreateProbeSetWriter(const std::wstring& xdaBlockName, u_int8_t unitType, u_int8_t direction, u_int32_t atoms, u_int32_t cells, u_int32_t probeSetNumber, u_int8_t cellsPerAtom) { //create data dataSet writer probeSetPlaneHdr.Clear(); probeSetPlaneHdr.SetName(xdaBlockName); ParameterNameValueType t1; t1.SetName(CDF_UNIT_TYPE); t1.SetValueUInt8(unitType); probeSetPlaneHdr.AddNameValParam(t1); ParameterNameValueType t2; t2.SetName(CDF_DIRECTION); t2.SetValueUInt8(direction); probeSetPlaneHdr.AddNameValParam(t2); ParameterNameValueType t3; t3.SetName(CDF_ATOMS); t3.SetValueUInt32(atoms); probeSetPlaneHdr.AddNameValParam(t3); ParameterNameValueType t4; t4.SetName(CDF_CELLS); t4.SetValueUInt32(cells); probeSetPlaneHdr.AddNameValParam(t4); ParameterNameValueType t5; t5.SetName(CDF_PROBE_SET_NUMBER); t5.SetValueUInt32(probeSetNumber); probeSetPlaneHdr.AddNameValParam(t5); ParameterNameValueType t6; t6.SetName(CDF_CELLS_PER_ATOM); t6.SetValueUInt8(cellsPerAtom); probeSetPlaneHdr.AddNameValParam(t6); probeSetPlaneHdr.AddUShortColumn(L""); probeSetPlaneHdr.AddUShortColumn(L""); probeSetPlaneHdr.AddUIntColumn(L""); probeSetPlaneHdr.AddUIntColumn(L""); probeSetPlaneHdr.AddByteColumn(L""); probeSetPlaneHdr.AddByteColumn(L""); // Set the number of rows // cells should be the same as atoms*cellPerAtom probeSetPlaneHdr.SetRowCnt(cells); return new CDFProbeSetWriter(dataGroupWriter->CreateDataSetWriter(probeSetPlaneHdr)); } void CDFFileWriter::OpenDataGroup(const std::wstring& probeSetName, int dataSetCnt) { DataGroupHeader dataGroup(probeSetName); for(int i = 0; i < dataSetCnt; i++) { DataSetHeader dsh; dataGroup.AddDataSetHdr(dsh); } dataGroupWriter = writer->CreateDataGroupWriter(dataGroup); WriteContentsEntry(probeSetName); dataGroupWriter->WriteHeader(); } void CDFFileWriter::CloseDataGroup() { dataGroupWriter->Close(); delete dataGroupWriter; dataGroupWriter = 0; } void CDFFileWriter::WriteContentsEntry(const std::wstring& probeSetName) { u_int32_t currentPos = writer->GetFilePos(); writer->SeekFromBeginPos(contentsPos); contentsWriter->Write(probeSetName, MAX_CDF_PROBE_SET_NAME_LENGTH); contentsWriter->Write(currentPos); contentsPos = writer->GetFilePos(); writer->SeekFromBeginPos(currentPos); } affxparser/src/fusion/calvin_files/writers/src/CDFFileWriter.h0000644000175200017520000000431314516003651025517 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CDFFileWriter_HEADER_ #define _CDFFileWriter_HEADER_ #include "calvin_files/data/src/CDFData.h" #include "calvin_files/writers/src/CDFProbeSetWriter.h" #include "calvin_files/writers/src/GenericFileWriter.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class CDFFileWriter { private: GenericFileWriter* writer; DataGroupWriter* dataGroupWriter; DataSetWriter* contentsWriter; DataSetHeader probeSetPlaneHdr; int32_t contentsPos; int32_t currentCubePos; public: CDFFileWriter(CDFData &p); ~CDFFileWriter(); CDFProbeSetWriter* CreateProbeSetWriter(const std::wstring& xdaBlockName, u_int8_t unitType, u_int8_t direction, u_int32_t atoms, u_int32_t cells, u_int32_t probeSetNumber, u_int8_t cellsPerAtom); /*! Prepares a data group for writing @param probeSetName Probe set name @param dataSetCnt number of data sets to be written */ void OpenDataGroup(const std::wstring& probeSetName, int dataSetCnt); /*! Closes data group after writing */ void CloseDataGroup(); private: void WriteHeaders(); void WriteContentsEntry(const std::wstring& probeSetName); }; } #endif // _CDFFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CDFProbeSetWriter.cpp0000644000175200017520000000272114516003651026717 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CDFProbeSetWriter.h" // using namespace affymetrix_calvin_io; void CDFProbeSetWriter::WriteHeader() { writer->WriteHeader(); } void CDFProbeSetWriter::Close() { writer->UpdateNextDataSetOffset(); delete writer; writer = 0; } void CDFProbeSetWriter::Write(u_int16_t xCoord, u_int16_t yCoord, u_int32_t atom, u_int32_t indexPos, int8_t baseProbe, int8_t baseTarget) { writer->Write(xCoord); writer->Write(yCoord); writer->Write(atom); writer->Write(indexPos); writer->Write(baseProbe); writer->Write(baseTarget); } affxparser/src/fusion/calvin_files/writers/src/CDFProbeSetWriter.h0000644000175200017520000000305414516003651026364 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CDFProbeSetWriter_HEADER_ #define _CDFProbeSetWriter_HEADER_ #include "calvin_files/writers/src/DataSetWriter.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class CDFProbeSetWriter { public: CDFProbeSetWriter(DataSetWriter* dpw) { writer = dpw; }; ~CDFProbeSetWriter() { delete writer; }; private: DataSetWriter* writer; public: void WriteHeader(); void Close(); void Write(u_int16_t xCoord, u_int16_t yCoord, u_int32_t atom, u_int32_t indexPos, int8_t baseProbe, int8_t baseTarget); }; } #endif // _CDFProbeSetWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCHPFileBufferWriter.cpp0000644000175200017520000000610414516003651030357 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file CalvinCHPFileBufferWriter.cpp * @author David Le * @date Mon May 15 12:09:42 2006 * * @brief Class for writing signals to a buffer before writing to CHP files. */ #include "calvin_files/writers/src/CalvinCHPFileBufferWriter.h" // #include "calvin_files/writers/src/CalvinCHPFileUpdater.h" // using namespace affymetrix_calvin_io; CHPFileBufferWriter::CHPFileBufferWriter() { m_BufferSize = 0; m_MaxBufferSize = MAX_BUFFER_SIZE; } CHPFileBufferWriter::~CHPFileBufferWriter() { FlushBuffer(); Cleanup(); } void CHPFileBufferWriter::Cleanup() { for (int target=0; target *CHPFileNames) { m_CHPFileNames = CHPFileNames; Cleanup(); for (int i=0; isize(); i++) { std::vector entryBuffer; m_TargetEntryBuffers.push_back(entryBuffer); m_TargetEntryRowIndexes.push_back(0); } m_BufferSize = 0; } void CHPFileBufferWriter::WriteGenotypeEntry(int target, CHPGenotypeEntry &entry) { GenotypeBufferEntry bufferEntry; bufferEntry.call = entry.GetCall(); bufferEntry.confidence = entry.GetConfidence(); bufferEntry.RAS1 = entry.GetRAS1(); bufferEntry.RAS2 = entry.GetRAS2(); bufferEntry.aaCall = entry.GetAACall(); bufferEntry.abCall = entry.GetABCall(); bufferEntry.bbCall = entry.GetBBCall(); bufferEntry.noCall = entry.GetNoCall(); m_TargetEntryBuffers[target].push_back(bufferEntry); m_BufferSize += sizeof(GenotypeBufferEntry); if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPFileBufferWriter::FlushBuffer() { if (m_BufferSize > 0) { for (int target=0; targetsize(); target++) { CalvinCHPFileUpdater updater; updater.OpenCHPFile((*m_CHPFileNames)[target].c_str()); updater.UpdateGenotypeEntryBuffer(m_TargetEntryRowIndexes[target], m_TargetEntryBuffers[target]); updater.CloseCHPFile(); m_TargetEntryRowIndexes[target] += m_TargetEntryBuffers[target].size(); m_TargetEntryBuffers[target].clear(); } } m_BufferSize = 0; } affxparser/src/fusion/calvin_files/writers/src/CalvinCHPFileBufferWriter.h0000644000175200017520000000525514516003651030032 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file CalvinCHPFileBufferWriter.h * @author David Le * @date Mon May 15 12:09:42 2006 * * @brief Class for writing signals to a buffer before writing to CHP files. */ #ifndef _CHPFILEBUFFERWRITER_HEADER_ #define _CHPFILEBUFFERWRITER_HEADER_ #include "calvin_files/data/src/CHPGenotypeEntry.h" // #include #include #include // #define MAX_BUFFER_SIZE 5242880 // 5 MB namespace affymetrix_calvin_io { class CHPFileBufferWriter { public: class GenotypeBufferEntry { public: u_int8_t call; float confidence; float RAS1; float RAS2; float aaCall; float abCall; float bbCall; float noCall; }; public: /*! Constructor */ CHPFileBufferWriter(); /*! Destructor */ ~CHPFileBufferWriter(); /*! Set maximum buffer size */ void SetMaxBufferSize(int MaxBufferSize) { m_MaxBufferSize = MaxBufferSize; } /*! Cleans up memory */ void Cleanup(); /*! Initialize entry buffer writer * @param CHPFileNames Reference to a list of CHP file names. */ void Initialize(std::vector *CHPFileNames); /*! Write an entry to buffer. If the buffer is full, flush it. * @param target Target for the Signal entry. * @param entry CHP genotype entry. */ void WriteGenotypeEntry(int target, CHPGenotypeEntry &entry); /*! Write the content of the buffer to Command Console CHP file. */ void FlushBuffer(); private: // Pointer to list of CHP file names. std::vector *m_CHPFileNames; // List of targets used for storing genotype entries. std::vector< std::vector > m_TargetEntryBuffers; // Buffer for storing genotype entry row indexes. std::vector m_TargetEntryRowIndexes; // Size of the current buffer in bytes. int m_BufferSize; // Maximum size of buffer before it gets flushed int m_MaxBufferSize; }; } #endif // _CHPFILEBUFFERWRITER_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCHPFileUpdater.cpp0000644000175200017520000010704414516003651027362 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CalvinCHPFileUpdater.h" // #include "calvin_files/parsers/src/CHPMultiDataFileReader.h" #include "calvin_files/writers/src/FileOutput.h" // #include "file/FileIO.h" #include "util/Err.h" #include "util/Util.h" // #include #include // using namespace std; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; /*! The data set index. */ #define ENTRY_DATA_SET 0 /*! The data group index. */ #define ENTRY_DATA_GROUP 0 /*! Probetset name and signal columns. */ #define NAME_COLUMN 0 #define ENTRY_COLUMN 1 /* * Initialize any needed members. */ CalvinCHPFileUpdater::CalvinCHPFileUpdater() : DataSetUpdater() { m_CHPFile = NULL; } /* * Clean up. */ CalvinCHPFileUpdater::~CalvinCHPFileUpdater() { CloseCHPFile(); } void CalvinCHPFileUpdater::Initialize(const char *file) { DataSetUpdater::Initialize(file); dataSetIndexMap.clear(); dataGroupIndexMap.clear(); std::map nameTypeMap; int nnames = sizeof(MultiDataDataSetNames) / sizeof(std::wstring); for (int iname=0; iname &expressionSignalBuffer) { // seek to start of update row (note NAME_COLUMN is 0) SeekToPosition(*m_CHPFile, ENTRY_DATA_GROUP, ENTRY_DATA_SET, row_start, NAME_COLUMN); int iProbeSetNameColumnSize = colsizes[ENTRY_DATA_GROUP][ENTRY_DATA_SET][NAME_COLUMN]; for (int i=0; i<(int)expressionSignalBuffer.size(); i++) { m_CHPFile->seekp(iProbeSetNameColumnSize, std::ios::cur); FileOutput::WriteFloat(*m_CHPFile, expressionSignalBuffer[i]); } } void CalvinCHPFileUpdater::UpdateGenotypeEntry(int row, u_int8_t call, float confidence, float RAS1, float RAS2, float aaCall, float abCall, float bbCall, float noCall) { SeekToPosition(*m_CHPFile, ENTRY_DATA_GROUP, ENTRY_DATA_SET, row, ENTRY_COLUMN); FileOutput::WriteUInt8(*m_CHPFile, call); FileOutput::WriteFloat(*m_CHPFile, confidence); FileOutput::WriteFloat(*m_CHPFile, RAS1); FileOutput::WriteFloat(*m_CHPFile, RAS2); FileOutput::WriteFloat(*m_CHPFile, aaCall); FileOutput::WriteFloat(*m_CHPFile, abCall); FileOutput::WriteFloat(*m_CHPFile, bbCall); FileOutput::WriteFloat(*m_CHPFile, noCall); } void CalvinCHPFileUpdater::UpdateGenotypeEntryBuffer(int row_start, const std::vector &genotypeEntryBuffer) { // seek to start of update row (note NAME_COLUMN is 0) SeekToPosition(*m_CHPFile, ENTRY_DATA_GROUP, ENTRY_DATA_SET, row_start, NAME_COLUMN); int iProbeSetNameColumnSize = colsizes[ENTRY_DATA_GROUP][ENTRY_DATA_SET][NAME_COLUMN]; for (int i=0; i<(int)genotypeEntryBuffer.size(); i++) { m_CHPFile->seekp(iProbeSetNameColumnSize, std::ios::cur); FileOutput::WriteUInt8(*m_CHPFile, genotypeEntryBuffer[i].call); FileOutput::WriteFloat(*m_CHPFile, genotypeEntryBuffer[i].confidence); FileOutput::WriteFloat(*m_CHPFile, genotypeEntryBuffer[i].RAS1); FileOutput::WriteFloat(*m_CHPFile, genotypeEntryBuffer[i].RAS2); FileOutput::WriteFloat(*m_CHPFile, genotypeEntryBuffer[i].aaCall); FileOutput::WriteFloat(*m_CHPFile, genotypeEntryBuffer[i].abCall); FileOutput::WriteFloat(*m_CHPFile, genotypeEntryBuffer[i].bbCall); FileOutput::WriteFloat(*m_CHPFile, genotypeEntryBuffer[i].noCall); } } void CalvinCHPFileUpdater::UpdateMultiDataGenotypeEntry(MultiDataType dataType, int row, const affymetrix_calvin_data::ProbeSetMultiDataGenotypeData &entry) { int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row, ENTRY_COLUMN); FileOutput::WriteUInt8(*m_CHPFile, entry.call); FileOutput::WriteFloat(*m_CHPFile, entry.confidence); UpdateMetrics(entry.metrics); } static int GetMetricBufferSize(const std::vector &metrics) { int bufferSize = 0; int ncols = (int) metrics.size(); for (int icol=0; icol &metrics, char * &pbuffer) { int ncols = (int) metrics.size(); for (int icol=0; icol &entryBuffer) { if (entryBuffer.size() == 0) { return; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, rowStart, NAME_COLUMN); // Create a buffer for writing. int iProbeSetNameColumnSize = colsizes[dgIndex][dsIndex][NAME_COLUMN]; int len = iProbeSetNameColumnSize - sizeof(int); int bufferSize = iProbeSetNameColumnSize + sizeof(entryBuffer[0].call) + sizeof(entryBuffer[0].confidence) + sizeof(entryBuffer[0].force) + sizeof(entryBuffer[0].signalA) + sizeof(entryBuffer[0].signalB) + sizeof(entryBuffer[0].contextA) + sizeof(entryBuffer[0].contextB) + GetMetricBufferSize(entryBuffer[0].metrics); bufferSize *= (int)entryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); // Copy the data to the buffer char *pbuffer = buffer; for (int i = 0; i<(int)entryBuffer.size(); i++) { MmSetUInt32_N((uint32_t*)pbuffer, len); pbuffer += sizeof(int); memcpy(pbuffer, entryBuffer[i].name.c_str(), entryBuffer[i].name.length()); pbuffer += len; MmSetUInt8((uint8_t*)pbuffer, entryBuffer[i].call); pbuffer += sizeof(uint8_t); MmSetFloat_N((float *)pbuffer, entryBuffer[i].confidence); pbuffer += sizeof(float); MmSetUInt8((uint8_t*)pbuffer, entryBuffer[i].force); pbuffer += sizeof(uint8_t); MmSetFloat_N((float *)pbuffer, entryBuffer[i].signalA); pbuffer += sizeof(float); MmSetFloat_N((float *)pbuffer, entryBuffer[i].signalB); pbuffer += sizeof(float); MmSetUInt8((uint8_t*)pbuffer, entryBuffer[i].contextA); pbuffer += sizeof(uint8_t); MmSetUInt8((uint8_t*)pbuffer, entryBuffer[i].contextB); pbuffer += sizeof(uint8_t); CopyMetricToBuffer(entryBuffer[i].metrics, pbuffer); } // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateDmetBiAllelicEntryBuffer(MultiDataType dataType, int rowStart, int bufferEntrySize, const std::vector &entryBuffer) { if (entryBuffer.size() == 0) { return; } // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)entryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)entryBuffer.size(); i++) { memcpy(pbuffer, entryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, rowStart, NAME_COLUMN); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateDmetCopyNumberEntry(MultiDataType dataType, int row, const affymetrix_calvin_data::DmetCopyNumberData &entry) { int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row, ENTRY_COLUMN); FileOutput::WriteInt16(*m_CHPFile, entry.call); FileOutput::WriteFloat(*m_CHPFile, entry.confidence); FileOutput::WriteInt16(*m_CHPFile, entry.force); FileOutput::WriteFloat(*m_CHPFile, entry.estimate); FileOutput::WriteFloat(*m_CHPFile, entry.lower); FileOutput::WriteFloat(*m_CHPFile, entry.upper); UpdateMetrics(entry.metrics); } void CalvinCHPFileUpdater::UpdateDmetCopyNumberEntryBuffer(MultiDataType dataType, int rowStart, const std::vector &entryBuffer) { if (entryBuffer.size() == 0) { return; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, rowStart, NAME_COLUMN); // Create a buffer for writing. int iProbeSetNameColumnSize = colsizes[dgIndex][dsIndex][NAME_COLUMN]; int len = iProbeSetNameColumnSize - sizeof(int); int bufferSize = iProbeSetNameColumnSize + sizeof(entryBuffer[0].call) + sizeof(entryBuffer[0].confidence) + sizeof(entryBuffer[0].force) + sizeof(entryBuffer[0].estimate) + sizeof(entryBuffer[0].lower) + sizeof(entryBuffer[0].upper) + GetMetricBufferSize(entryBuffer[0].metrics); bufferSize *= (int)entryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); // Copy the data to the buffer char *pbuffer = buffer; for (int i = 0; i<(int)entryBuffer.size(); i++) { MmSetUInt32_N((uint32_t*)pbuffer, len); pbuffer += sizeof(int); memcpy(pbuffer, entryBuffer[i].name.c_str(), entryBuffer[i].name.length()); pbuffer += len; MmSetUInt16_N((uint16_t*)pbuffer, entryBuffer[i].call); pbuffer += sizeof(uint16_t); MmSetFloat_N((float*)pbuffer, entryBuffer[i].confidence); pbuffer += sizeof(float); MmSetUInt16_N((uint16_t*)pbuffer, entryBuffer[i].force); pbuffer += sizeof(uint16_t); MmSetFloat_N((float*)pbuffer, entryBuffer[i].estimate); pbuffer += sizeof(float); MmSetFloat_N((float*)pbuffer, entryBuffer[i].lower); pbuffer += sizeof(float); MmSetFloat_N((float*)pbuffer, entryBuffer[i].upper); pbuffer += sizeof(float); CopyMetricToBuffer(entryBuffer[i].metrics, pbuffer); } // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateDmetCopyNumberEntryBuffer(MultiDataType dataType, int rowStart, int bufferEntrySize, const std::vector &entryBuffer) { if (entryBuffer.size() == 0) { return; } // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)entryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)entryBuffer.size(); i++) { memcpy(pbuffer, entryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, rowStart, NAME_COLUMN); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateDmetMultiAllelicEntry(MultiDataType dataType, int row, const affymetrix_calvin_data::DmetMultiAllelicData &entry) { int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row, ENTRY_COLUMN); FileOutput::WriteUInt8(*m_CHPFile, entry.call); FileOutput::WriteFloat(*m_CHPFile, entry.confidence); FileOutput::WriteUInt8(*m_CHPFile, entry.force); FileOutput::WriteUInt8(*m_CHPFile, entry.alleleCount); FileOutput::WriteFloat(*m_CHPFile, entry.signalA); FileOutput::WriteFloat(*m_CHPFile, entry.signalB); FileOutput::WriteFloat(*m_CHPFile, entry.signalC); FileOutput::WriteFloat(*m_CHPFile, entry.signalD); FileOutput::WriteFloat(*m_CHPFile, entry.signalE); FileOutput::WriteFloat(*m_CHPFile, entry.signalF); FileOutput::WriteUInt8(*m_CHPFile, entry.contextA); FileOutput::WriteUInt8(*m_CHPFile, entry.contextB); FileOutput::WriteUInt8(*m_CHPFile, entry.contextC); FileOutput::WriteUInt8(*m_CHPFile, entry.contextD); FileOutput::WriteUInt8(*m_CHPFile, entry.contextE); FileOutput::WriteUInt8(*m_CHPFile, entry.contextF); UpdateMetrics(entry.metrics); } void CalvinCHPFileUpdater::UpdateDmetMultiAllelicEntryBuffer(MultiDataType dataType, int rowStart, const std::vector &entryBuffer) { if (entryBuffer.size() == 0) { return; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, rowStart, NAME_COLUMN); // Create a buffer for writing. int iProbeSetNameColumnSize = colsizes[dgIndex][dsIndex][NAME_COLUMN]; int len = iProbeSetNameColumnSize - sizeof(int); int bufferSize = iProbeSetNameColumnSize + sizeof(entryBuffer[0].call) + sizeof(entryBuffer[0].confidence) + sizeof(entryBuffer[0].force) + sizeof(entryBuffer[0].alleleCount) + sizeof(entryBuffer[0].signalA) + sizeof(entryBuffer[0].signalB) + sizeof(entryBuffer[0].signalC) + sizeof(entryBuffer[0].signalD) + sizeof(entryBuffer[0].signalE) + sizeof(entryBuffer[0].signalF) + sizeof(entryBuffer[0].contextA) + sizeof(entryBuffer[0].contextB) + sizeof(entryBuffer[0].contextC) + sizeof(entryBuffer[0].contextD) + sizeof(entryBuffer[0].contextE) + sizeof(entryBuffer[0].contextF) + GetMetricBufferSize(entryBuffer[0].metrics); bufferSize *= (int)entryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); // Copy the data to the buffer char *pbuffer = buffer; for (int i = 0; i<(int)entryBuffer.size(); i++) { MmSetUInt32_N((uint32_t*)pbuffer, len); pbuffer += sizeof(int); memcpy(pbuffer, entryBuffer[i].name.c_str(), entryBuffer[i].name.length()); pbuffer += len; MmSetUInt8((uint8_t*)pbuffer, entryBuffer[i].call); pbuffer += sizeof(uint8_t); MmSetFloat_N((float*)pbuffer, entryBuffer[i].confidence); pbuffer += sizeof(float); MmSetUInt8((uint8_t*)pbuffer, entryBuffer[i].force); pbuffer += sizeof(uint8_t); MmSetUInt8((uint8_t*)pbuffer, entryBuffer[i].alleleCount); pbuffer += sizeof(uint8_t); MmSetFloat_N((float*)pbuffer, entryBuffer[i].signalA); pbuffer += sizeof(float); MmSetFloat_N((float*)pbuffer, entryBuffer[i].signalB); pbuffer += sizeof(float); MmSetFloat_N((float*)pbuffer, entryBuffer[i].signalC); pbuffer += sizeof(float); MmSetFloat_N((float*)pbuffer, entryBuffer[i].signalD); pbuffer += sizeof(float); MmSetFloat_N((float*)pbuffer, entryBuffer[i].signalE); pbuffer += sizeof(float); MmSetFloat_N((float*)pbuffer, entryBuffer[i].signalF); pbuffer += sizeof(float); MmSetUInt8((uint8_t*)pbuffer, entryBuffer[i].contextA); pbuffer += sizeof(uint8_t); MmSetUInt8((uint8_t*)pbuffer, entryBuffer[i].contextB); pbuffer += sizeof(uint8_t); MmSetUInt8((uint8_t*)pbuffer, entryBuffer[i].contextC); pbuffer += sizeof(uint8_t); MmSetUInt8((uint8_t*)pbuffer, entryBuffer[i].contextD); pbuffer += sizeof(uint8_t); MmSetUInt8((uint8_t*)pbuffer, entryBuffer[i].contextE); pbuffer += sizeof(uint8_t); MmSetUInt8((uint8_t*)pbuffer, entryBuffer[i].contextF); pbuffer += sizeof(uint8_t); CopyMetricToBuffer(entryBuffer[i].metrics, pbuffer); } // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateDmetMultiAllelicEntryBuffer(MultiDataType dataType, int rowStart, int bufferEntrySize, const std::vector &entryBuffer) { if (entryBuffer.size() == 0) return; // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)entryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)entryBuffer.size(); i++) { memcpy(pbuffer, entryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, rowStart, NAME_COLUMN); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateMultiDataGenotypeEntryBuffer(MultiDataType dataType, int row_start, const std::vector &genotypeEntryBuffer) { if (genotypeEntryBuffer.size() == 0) return; // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row_start, NAME_COLUMN); // Create a buffer for writing. int iProbeSetNameColumnSize = colsizes[dgIndex][dsIndex][NAME_COLUMN]; int len = iProbeSetNameColumnSize - sizeof(int); int bufferSize = iProbeSetNameColumnSize + sizeof(genotypeEntryBuffer[0].call) + sizeof(genotypeEntryBuffer[0].confidence) + GetMetricBufferSize(genotypeEntryBuffer[0].metrics); bufferSize *= (int)genotypeEntryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); // Copy the data to the buffer char *pbuffer = buffer; for (int i=0; i<(int)genotypeEntryBuffer.size(); i++) { MmSetUInt32_N((uint32_t *)pbuffer, len); pbuffer += sizeof(int); memcpy(pbuffer, genotypeEntryBuffer[i].name.c_str(), genotypeEntryBuffer[i].name.length()); pbuffer += len; *pbuffer = genotypeEntryBuffer[i].call; pbuffer += sizeof(char); MmSetFloat_N((float *)pbuffer, genotypeEntryBuffer[i].confidence); pbuffer += sizeof(float); CopyMetricToBuffer(genotypeEntryBuffer[i].metrics, pbuffer); } // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateMultiDataGenotypeEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &genotypeEntryBuffer) { if (genotypeEntryBuffer.size() == 0) return; // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)genotypeEntryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)genotypeEntryBuffer.size(); i++) { memcpy(pbuffer, genotypeEntryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row_start, NAME_COLUMN); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateMultiDataCopyNumberEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector ©NumberEntryBuffer) { if (copyNumberEntryBuffer.size() == 0) return; // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)copyNumberEntryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)copyNumberEntryBuffer.size(); i++) { memcpy(pbuffer, copyNumberEntryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row_start, NAME_COLUMN); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateMultiDataCytoRegionEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &cytoEntryBuffer) { if (cytoEntryBuffer.size() == 0) return; // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)cytoEntryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)cytoEntryBuffer.size(); i++) { memcpy(pbuffer, cytoEntryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row_start, NAME_COLUMN); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateMultiDataCopyNumberVariationRegionEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &cnVariationEntryBuffer) { if (cnVariationEntryBuffer.size() == 0) return; // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)cnVariationEntryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)cnVariationEntryBuffer.size(); i++) { memcpy(pbuffer, cnVariationEntryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row_start, NAME_COLUMN); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateMultiDataExpressionEntry(MultiDataType dataType, int row, const affymetrix_calvin_data::ProbeSetMultiDataExpressionData &entry) { int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row, ENTRY_COLUMN); FileOutput::WriteFloat(*m_CHPFile, entry.quantification); UpdateMetrics(entry.metrics); } void CalvinCHPFileUpdater::UpdateMultiDataExpressionEntryBuffer(MultiDataType dataType, int row_start, const std::vector &expressionEntryBuffer) { // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row_start, NAME_COLUMN); int iProbeSetNameColumnSize = colsizes[dgIndex][dsIndex][NAME_COLUMN]; for (int i=0; i<(int)expressionEntryBuffer.size(); i++) { m_CHPFile->seekp(iProbeSetNameColumnSize, std::ios::cur); FileOutput::WriteFloat(*m_CHPFile, expressionEntryBuffer[i].quantification); UpdateMetrics(expressionEntryBuffer[i].metrics); } } void CalvinCHPFileUpdater::UpdateMultiDataExpressionEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &expressionEntryBuffer) { if (expressionEntryBuffer.size() == 0) return; // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)expressionEntryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)expressionEntryBuffer.size(); i++) { memcpy(pbuffer, expressionEntryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row_start, NAME_COLUMN); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateChromosomeSegmentEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &entryBuffer) { if (entryBuffer.size() == 0) return; // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)entryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)entryBuffer.size(); i++) { memcpy(pbuffer, entryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row_start, 0); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateChromosomeSummaryEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &entryBuffer) { if (entryBuffer.size() == 0) return; // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)entryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)entryBuffer.size(); i++) { memcpy(pbuffer, entryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row_start, 0); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateFamilialSegmentOverlapEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &entryBuffer) { if (entryBuffer.size() == 0) return; // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)entryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)entryBuffer.size(); i++) { memcpy(pbuffer, entryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row_start, 0); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateFamilialSampleEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &entryBuffer) { if (entryBuffer.size() == 0) return; // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)entryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)entryBuffer.size(); i++) { memcpy(pbuffer, entryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, row_start, 0); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateMultiDataAllelePeaksEntryBuffer(MultiDataType dataType, int rowStart, int bufferEntrySize, const std::vector &allelePeakEntryBuffer) { if (allelePeakEntryBuffer.size() == 0) { return; } // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)allelePeakEntryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)allelePeakEntryBuffer.size(); i++) { memcpy(pbuffer, allelePeakEntryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, rowStart, NAME_COLUMN); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateMultiDataMarkerABSignalsEntryBuffer(MultiDataType dataType, int rowStart, int bufferEntrySize, const std::vector &entryBuffer) { if (entryBuffer.size() == 0) { return; } // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)entryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)entryBuffer.size(); i++) { memcpy(pbuffer, entryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, rowStart, NAME_COLUMN); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateMultiDataCytoGenotypeEntryBuffer(MultiDataType dataType, int rowStart, int bufferEntrySize, const std::vector &entryBuffer) { if (entryBuffer.size() == 0) { return; } // Copy the data to the buffer int bufferSize = bufferEntrySize * (int)entryBuffer.size(); char *buffer = new char[bufferSize]; memset(buffer, 0, bufferSize); char *pbuffer = buffer; for (int i=0; i<(int)entryBuffer.size(); i++) { memcpy(pbuffer, entryBuffer[i], bufferEntrySize); pbuffer += bufferEntrySize; } // seek to start of update row (note NAME_COLUMN is 0) int dsIndex = dataSetIndexMap[dataType]; int dgIndex = dataGroupIndexMap[dataType]; SeekToPosition(*m_CHPFile, dgIndex, dsIndex, rowStart, NAME_COLUMN); // Write the buffer. m_CHPFile->write(buffer, bufferSize); delete[] buffer; buffer = NULL; } void CalvinCHPFileUpdater::UpdateMetrics(const std::vector &metrics) { int ncols = (int) metrics.size(); for (int icol=0; icolis_open() == true) { m_CHPFile->close(); } delete m_CHPFile; m_CHPFile = NULL; } dataSetIndexMap.clear(); dataGroupIndexMap.clear(); } affxparser/src/fusion/calvin_files/writers/src/CalvinCHPFileUpdater.h0000644000175200017520000003576114516003651027035 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! \file CalvinCHPFileUpdater.h Provides interfaces to update data in a "Calvin" binary "signal" data file. */ #ifndef _CalvinCHPFileUpdater_HEADER_ #define _CalvinCHPFileUpdater_HEADER_ #include "calvin_files/data/src/CHPMultiDataData.h" #include "calvin_files/data/src/ProbeSetMultiDataData.h" #include "calvin_files/writers/src/CalvinCHPFileBufferWriter.h" #include "calvin_files/writers/src/DataSetUpdater.h" // #include #include // namespace affymetrix_calvin_io { /*! Provides interfaces to update data in a "Calvin" binary "signal" data file. * This class assumes that the file has been completely written and that * only existing data groups/sets/rows/cols are being modified. */ class CalvinCHPFileUpdater : public DataSetUpdater { public: /*! Constructor */ CalvinCHPFileUpdater(); /*! Destructor */ ~CalvinCHPFileUpdater(); /*! Initialize the class given a "calvin" data file. * @param file The name of the file. * @exception affymetrix_calvin_exceptions::FileNotFoundException The file does not exist. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void Initialize(const char *file); /*! Open CHP signal file and initialize it. * @param fileName The name of the CHP signal file. */ void OpenCHPFile(const char *fileName); /*! Seek to appropriate file position and update expression signal * @param row The row index. * @param quantification The new quantification value. */ void UpdateExpressionQuantification(int row, float quantification); /*! Seek to appropriate file position and update expression signal vector * @param row_start The start row for updating. * @param expressionSignalBuffer The vector contain all buffered signals to be updated. */ void UpdateExpressionQuantificationBuffer(int row_start, std::vector &expressionSignalBuffer); /*! Seek to appropriate file position and update genotype entry * @param row The row index. * @param call CHP call representation. * @param confidence CHP confidence value. * @param RAS1 CHP RAS1 value. * @param RAS2 CHP RAS2 value. * @param aaCall CHP aaCall value. * @param abCall CHP abCall value. * @param bbCall CHP bbCall value. * @param noCall CHP noCall value. */ void UpdateGenotypeEntry(int row, u_int8_t call, float confidence, float RAS1, float RAS2, float aaCall, float abCall, float bbCall, float noCall); /*! Seek to appropriate file position and update genotype entry vector * @param row_start The start row for updating. * @param genotypeBufferEntry The vector contain all buffered entries to be updated. */ void UpdateGenotypeEntryBuffer(int row_start, const std::vector &genotypeEntryBuffer); /*! Seek to appropriate file position and update genotype entry * @param dataType The data type. * @param row The row index. * @param call CHP call representation. * @param confidence CHP confidence value. */ void UpdateMultiDataGenotypeEntry(MultiDataType dataType, int row, const affymetrix_calvin_data::ProbeSetMultiDataGenotypeData &entry); /*! Seek to appropriate file position and update genotype entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param genotypeBufferEntry The vector contain all buffered entries to be updated. */ void UpdateMultiDataGenotypeEntryBuffer(MultiDataType dataType, int row_start, const std::vector &genotypeEntryBuffer); /*! Seek to appropriate file position and update genotype entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param genotypeBufferEntry The vector contain all buffered entries to be updated. */ void UpdateMultiDataGenotypeEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &genotypeEntryBuffer); /*! Seek to appropriate file position and update copy number entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param copyNumberEntryBuffer The vector contain all buffered entries to be updated. */ void UpdateMultiDataCopyNumberEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector ©NumberEntryBuffer); /*! Seek to appropriate file position and update cyto entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param cytoEntryBuffer The vector contain all buffered entries to be updated. */ void UpdateMultiDataCytoRegionEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &cytoEntryBuffer); /*! Seek to appropriate file position and update cyto entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param cytoEntryBuffer The vector contain all buffered entries to be updated. */ void UpdateMultiDataCopyNumberVariationRegionEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &cnVariationEntryBuffer); /*! Seek to appropriate file position and update expression entry * @param dataType The data type. * @param row The row index. * @param call CHP call representation. * @param confidence CHP confidence value. */ void UpdateMultiDataExpressionEntry(MultiDataType dataType, int row, const affymetrix_calvin_data::ProbeSetMultiDataExpressionData &entry); /*! Seek to appropriate file position and update expression entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param expressionBufferEntry The vector contain all buffered entries to be updated. */ void UpdateMultiDataExpressionEntryBuffer(MultiDataType dataType, int row_start, const std::vector &expressionEntryBuffer); /*! Seek to appropriate file position and update expression entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param expressionBufferEntry The vector contain all buffered entries to be updated. */ void UpdateMultiDataExpressionEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &expressionEntryBuffer); /*! Seek to appropriate file position and update genotype entry * @param dataType The data type. * @param row The row index. * @param call CHP call representation. * @param confidence CHP confidence value. */ void UpdateDmetBiAllelicEntry(MultiDataType dataType, int row, const affymetrix_calvin_data::DmetBiAllelicData &entry); /*! Seek to appropriate file position and update genotype entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param genotypeBufferEntry The vector contain all buffered entries to be updated. */ void UpdateDmetBiAllelicEntryBuffer(MultiDataType dataType, int row_start, const std::vector &entryBuffer); /*! Seek to appropriate file position and update genotype entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param genotypeBufferEntry The vector contain all buffered entries to be updated. */ void UpdateDmetBiAllelicEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &entryBuffer); /*! Seek to appropriate file position and update genotype entry * @param dataType The data type. * @param row The row index. * @param call CHP call representation. * @param confidence CHP confidence value. */ void UpdateDmetCopyNumberEntry(MultiDataType dataType, int row, const affymetrix_calvin_data::DmetCopyNumberData &entry); /*! Seek to appropriate file position and update genotype entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param genotypeBufferEntry The vector contain all buffered entries to be updated. */ void UpdateDmetCopyNumberEntryBuffer(MultiDataType dataType, int row_start, const std::vector &entryBuffer); /*! Seek to appropriate file position and update genotype entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param genotypeBufferEntry The vector contain all buffered entries to be updated. */ void UpdateDmetCopyNumberEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &entryBuffer); /*! Seek to appropriate file position and update genotype entry * @param dataType The data type. * @param row The row index. * @param call CHP call representation. * @param confidence CHP confidence value. */ void UpdateDmetMultiAllelicEntry(MultiDataType dataType, int row, const affymetrix_calvin_data::DmetMultiAllelicData &entry); /*! Seek to appropriate file position and update genotype entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param genotypeBufferEntry The vector contain all buffered entries to be updated. */ void UpdateDmetMultiAllelicEntryBuffer(MultiDataType dataType, int row_start, const std::vector &entryBuffer); /*! Seek to appropriate file position and update genotype entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param genotypeBufferEntry The vector contain all buffered entries to be updated. */ void UpdateDmetMultiAllelicEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &entryBuffer); /*! Seek to appropriate file position and update the segment entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param entryBuffer The vector contain all buffered entries to be updated. */ void UpdateChromosomeSegmentEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &entryBuffer); /*! Seek to appropriate file position and update the summary entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param entryBuffer The vector contain all buffered entries to be updated. */ void UpdateChromosomeSummaryEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &entryBuffer); /*! Seek to appropriate file position and update the summary entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param entryBuffer The vector contain all buffered entries to be updated. */ void UpdateFamilialSegmentOverlapEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &entryBuffer); /*! Seek to appropriate file position and update the sample entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param entryBuffer The vector contain all buffered entries to be updated. */ void UpdateFamilialSampleEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &entryBuffer); /*! Seek to appropriate file position and update allele peak entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param allelePeakEntryBuffer The vector contain all buffered entries to be updated. */ void UpdateMultiDataAllelePeaksEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &allelePeakEntryBuffer); /*! Seek to appropriate file position and update AB signal entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param entryBuffer The vector contain all buffered entries to be updated. */ void UpdateMultiDataMarkerABSignalsEntryBuffer(MultiDataType dataType, int row_start, int bufferEntrySize, const std::vector &entryBuffer); /*! Seek to appropriate file position and update Cyto genotyping call entry vector * @param dataType The data type. * @param row_start The start row for updating. * @param bufferEntrySize The size of the buffer for a given element in the vector * @param entryBuffer The vector contain all buffered entries to be updated. */ void UpdateMultiDataCytoGenotypeEntryBuffer(MultiDataType dataType, int rowStart, int bufferEntrySize, const std::vector &entryBuffer); /*! Close CHP signal file. */ void CloseCHPFile(); private: // CHP signal file std::ofstream *m_CHPFile; /*! Map of data type to index. */ std::map dataSetIndexMap; /*! Map of data type to data group index. */ std::map dataGroupIndexMap; /*! Update the metrics */ void UpdateMetrics(const std::vector &metrics); }; } #endif // _CalvinCHPFileUpdater_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCHPFileWriter.cpp0000644000175200017520000001105014516003651027221 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CalvinCHPFileWriter.h" // #include "calvin_files/data/src/CHPData.h" // using namespace affymetrix_calvin_io; CHPFileWriter::CHPFileWriter(CHPData &p) { writer = new GenericFileWriter(p.GetFileHeader()); entryPos = 0; bgZonePos = 0; forcePos = 0; origPos = 0; maxProbeSetName = p.GetMaxProbeSetName(); WriteHeaders(); } CHPFileWriter::~CHPFileWriter() { delete writer; } void CHPFileWriter::WriteHeaders() { writer->WriteHeader(); DataGroupWriterIt beginGrp; DataGroupWriterIt endGrp; writer->GetDataGroupWriters(beginGrp, endGrp); while(beginGrp != endGrp) { DataGroupWriter* dataGroupWriter = &(*beginGrp); dataGroupWriter->WriteHeader(); DataSetWriterIt beginSet; DataSetWriterIt endSet; dataGroupWriter->GetDataSetWriters(beginSet, endSet); while(beginSet != endSet) { dataSetWriter = &(*beginSet); beginSet->WriteHeader(); SetFilePositions(); beginSet++; } dataGroupWriter->UpdateNextDataGroupPos(); beginGrp++; } } void CHPFileWriter::SeekToDataSet() { writer->SeekFromBeginPos(entryPos); } void CHPFileWriter::WriteExpressionEntry(const CHPExpressionEntry& p) { dataSetWriter->Write(p.GetProbeSetName(), maxProbeSetName); dataSetWriter->Write(p.GetDetection()); dataSetWriter->Write(p.GetDetectionPValue()); dataSetWriter->Write(p.GetSignal()); dataSetWriter->Write(p.GetNumPairs()); dataSetWriter->Write(p.GetNumPairsUsed()); if (p.GetHasComparisonData()) { dataSetWriter->Write(p.GetChange()); dataSetWriter->Write(p.GetChangePValue()); dataSetWriter->Write(p.GetSigLogRatio()); dataSetWriter->Write(p.GetSigLogRatioLo()); dataSetWriter->Write(p.GetSigLogRatioHi()); dataSetWriter->Write(p.GetCommonPairs()); } } void CHPFileWriter::WriteGenotypeEntry(const CHPGenotypeEntry& p) { dataSetWriter->Write(p.GetProbeSetName(), maxProbeSetName); dataSetWriter->Write(p.GetCall()); dataSetWriter->Write(p.GetConfidence()); dataSetWriter->Write(p.GetRAS1()); dataSetWriter->Write(p.GetRAS2()); dataSetWriter->Write(p.GetAACall()); dataSetWriter->Write(p.GetABCall()); dataSetWriter->Write(p.GetBBCall()); dataSetWriter->Write(p.GetNoCall()); } void CHPFileWriter::WriteUniversalEntry(const CHPUniversalEntry& p) { dataSetWriter->Write(p.GetBackground()); } void CHPFileWriter::WriteReseqEntry(const CHPReseqEntry& p) { dataSetWriter->Write((int8_t)p.call); dataSetWriter->Write(p.score); } void CHPFileWriter::SeekToBgSet() { writer->SeekFromBeginPos(bgZonePos); } void CHPFileWriter::WriteBackgroundZone(const CHPBackgroundZone& zone) { dataSetWriter->Write(zone.GetCenterX()); dataSetWriter->Write(zone.GetCenterY()); dataSetWriter->Write(zone.GetBackground()); dataSetWriter->Write(zone.GetSmoothFactor()); } void CHPFileWriter::SeekToForceSet() { writer->SeekFromBeginPos(forcePos); } void CHPFileWriter::WriteForceCall(const CHPReseqForceCall& force) { dataSetWriter->Write(force.position); dataSetWriter->Write((int8_t)force.call); dataSetWriter->Write((int8_t)force.reason); } void CHPFileWriter::SeekToOrigCallSet() { writer->SeekFromBeginPos(origPos); } void CHPFileWriter::WriteOrigCall(const CHPReseqOrigCall& orig) { dataSetWriter->Write(orig.position); dataSetWriter->Write((int8_t)orig.call); } void CHPFileWriter::SetFilePositions() { const std::wstring &name = dataSetWriter->GetDataSetName(); int32_t dataSetSz = dataSetWriter->GetDataSetSize(); if(name == CHP_BG_ZONE_GROUP) { bgZonePos = writer->GetFilePos(); } else if (name == CHP_RESEQ_FORCE_CALL_GROUP) { forcePos = writer->GetFilePos(); } else if (name == CHP_RESEQ_ORIG_CALL_GROUP) { origPos = writer->GetFilePos(); } else { entryPos = writer->GetFilePos(); } writer->SeekFromCurrentPos(dataSetSz + 1); dataSetWriter->UpdateNextDataSetOffset(); } affxparser/src/fusion/calvin_files/writers/src/CalvinCHPFileWriter.h0000644000175200017520000000674714516003651026707 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPFileWriter_HEADER_ #define _CHPFileWriter_HEADER_ /*! \file CalvinCHPFileWriter.h Provides classes to write CHP files with expression, genotyping, tag and resequencing data. */ #include "calvin_files/data/src/CHPBackgroundZone.h" #include "calvin_files/data/src/CHPData.h" #include "calvin_files/data/src/CHPExpressionEntry.h" #include "calvin_files/data/src/CHPGenotypeEntry.h" #include "calvin_files/data/src/CHPReseqEntry.h" #include "calvin_files/data/src/CHPUniversalEntry.h" #include "calvin_files/writers/src/GenericFileWriter.h" // #include // namespace affymetrix_calvin_io { /*! Provides write capabilities for expression, genotyping, resequencing and tag arrays. */ class CHPFileWriter { private: /*! The maximum length of a probe set name. */ int maxProbeSetName; /*! The file writer. */ GenericFileWriter* writer; /*! The data set writer. */ DataSetWriter* dataSetWriter; /*! The position of the data entry. */ int32_t entryPos; /*! The position of the bg zone entry. */ int32_t bgZonePos; /*! The position of the force entry. */ int32_t forcePos; /*! The position of the orig entry. */ int32_t origPos; public: /*! Constructor. * @param p The CHP file object. */ CHPFileWriter(CHPData& p); /*! Destructor */ ~CHPFileWriter(); /*! Seeks to the data set. */ void SeekToDataSet(); /*! Writes an expression result to the file. * @param p The result to write. */ void WriteExpressionEntry(const CHPExpressionEntry& p); /*! Writes a genotype result to the file. * @param p The result to write. */ void WriteGenotypeEntry(const CHPGenotypeEntry& p); /*! Writes a universal (tag) result to the file. * @param p The result to write. */ void WriteUniversalEntry(const CHPUniversalEntry& p); /*! Writes a resequencing result to the file. * @param p The result to write. */ void WriteReseqEntry(const CHPReseqEntry& p); /*! Seeks to the data set. */ void SeekToBgSet(); /*! Writes a background zone to the file. * @param zone The result to write. */ void WriteBackgroundZone(const CHPBackgroundZone& zone); /*! Seeks to the data set. */ void SeekToForceSet(); /*! Writes a force call to the file. * @param force The result to write. */ void WriteForceCall(const CHPReseqForceCall& force); /*! Seeks to the data set. */ void SeekToOrigCallSet(); /*! Writes original call to the file. * @param orig The result to write. */ void WriteOrigCall(const CHPReseqOrigCall& orig); private: /*! Writes the header to the file. */ void WriteHeaders(); /*! Updates the file positions after a data set is added to the file. */ void SetFilePositions(); }; } #endif // _CHPFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCHPMultiDataFileBufferWriter.cpp0000644000175200017520000011766214516003651032200 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file CalvinCHPMultiDataFileBufferWriter.cpp * @brief Class for writing MultiDatas to a buffer before writing to CHP files. */ #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #else #include #include #include #endif // #include "calvin_files/writers/src/CalvinCHPMultiDataFileBufferWriter.h" // #include "calvin_files/writers/src/CalvinCHPFileUpdater.h" #include "calvin_files/writers/src/FileOutput.h" // #include "file/FileIO.h" // #include #include #include // using namespace std; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_data; using namespace affymetrix_calvin_parameter; CHPMultiDataFileBufferWriter::CHPMultiDataFileBufferWriter() { m_BufferSize = 0; m_MaxBufferSize = MAX_BUFFER_SIZE; } CHPMultiDataFileBufferWriter::~CHPMultiDataFileBufferWriter() { FlushBuffer(); Cleanup(); } static void ClearBuffer(std::vector< std::vector > &buffers, std::vector &rowIndexes) { for (int target=0; target<(int)buffers.size(); target++) { int nbufs = (int) buffers[target].size(); for (int ibuf=0; ibuf < nbufs; ibuf++) { delete[] buffers[target][ibuf]; buffers[target][ibuf] = NULL; } buffers[target].clear(); } buffers.clear(); rowIndexes.clear(); } void CHPMultiDataFileBufferWriter::Cleanup() { std::map > >::iterator it; std::map >::iterator rit; for (rit = rowIndexes.begin(), it = dataBuffers.begin(); rit != rowIndexes.end() && it != dataBuffers.end(); rit++, it++) { ClearBuffer(it->second, rit->second); } rowIndexes.clear(); dataBuffers.clear(); dataBufferSz.clear(); } void CHPMultiDataFileBufferWriter::Initialize(vector *CHPFileNames, std::vector &dataTypes, std::map &maxProbeSetNmLn) { std::map empty; Initialize(CHPFileNames, dataTypes, maxProbeSetNmLn, empty, empty, empty, empty, empty, empty, empty); } void CHPMultiDataFileBufferWriter::Initialize(vector *CHPFileNames, vector &dataTypes, map &maxSegmentTypeLn, map &maxReferenceSegmentIDLn, map &maxFamilialSegmentIDLn, map &maxFamilialARRIDLn, map &maxFamilialCHPIDLn, map &maxFamilialCHPFilenameLn, map &maxFamilialRoleLn ) { std::map empty; Initialize(CHPFileNames, dataTypes, empty, maxSegmentTypeLn, maxReferenceSegmentIDLn, maxFamilialSegmentIDLn, maxFamilialARRIDLn, maxFamilialCHPIDLn, maxFamilialCHPFilenameLn, maxFamilialRoleLn); } void CHPMultiDataFileBufferWriter::Initialize ( std::vector *CHPFileNames, std::vector &dataTypes, std::map &maxProbeSetNmLn, std::map &maxSegmentTypeLn, std::map &maxReferenceSegmentIDLn, std::map &maxFamilialSegmentIDLn, std::map &maxFamilialARRIDLn, std::map &maxFamilialCHPIDLn, std::map &maxFamilialCHPFilenameLn, std::map &maxFamilialRoleLn ) { chpFileNames = CHPFileNames; maxSegmentType = maxSegmentTypeLn; maxReferenceSegmentID = maxReferenceSegmentIDLn; maxFamilialSegmentID = maxFamilialSegmentIDLn; maxFamilialARRID = maxFamilialARRIDLn; maxFamilialCHPID = maxFamilialCHPIDLn; maxFamilialCHPFilename = maxFamilialCHPFilenameLn; maxFamilialRole = maxFamilialRoleLn; maxProbeSetNameLength = maxProbeSetNmLn; Cleanup(); for (int i=0; i<(int)chpFileNames->size(); i++) { for (int itype=0; itype<(int)dataTypes.size(); itype++) { vector buffer; dataBuffers[dataTypes[itype]].push_back(buffer); rowIndexes[dataTypes[itype]].push_back(0); } } m_BufferSize = 0; for (int i=0; i<(int)dataTypes.size(); i++) { dataBufferSz[dataTypes[i]] = 0; } } // static to keep function local to this file - perhaps this should be a static private member function? -AW static int GetMetricBufferSize(const std::vector &metrics) { int bufferSize = 0; int ncols = (int) metrics.size(); for (int icol=0; icol &metrics, char * &pbuffer) { int ncols = (int) metrics.size(); for (int icol=0; icol m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteMultiDataExpressionEntry(MultiDataType dataType, int target, const ProbeSetMultiDataExpressionData &entry) { if (dataBufferSz[dataType] == 0) dataBufferSz[dataType] = maxProbeSetNameLength[dataType] + sizeof(int) + sizeof(entry.quantification) + GetMetricBufferSize(entry.metrics); char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t *)pbuffer, entry.name.length()); pbuffer += sizeof(int); memcpy(pbuffer, entry.name.c_str(), entry.name.length()); pbuffer += maxProbeSetNameLength[dataType]; MmSetFloat_N((float *)pbuffer, entry.quantification); pbuffer += sizeof(float); CopyMetricToBuffer(entry.metrics, pbuffer); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteMultiDataCopyNumberEntry(MultiDataType dataType, int target, const ProbeSetMultiDataCopyNumberData &entry) { if (dataBufferSz[dataType] == 0) dataBufferSz[dataType] = maxProbeSetNameLength[dataType] + sizeof(int) + sizeof(entry.chr) + sizeof(entry.position) + GetMetricBufferSize(entry.metrics); char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t *)pbuffer, entry.name.length()); pbuffer += sizeof(int); memcpy(pbuffer, entry.name.c_str(), entry.name.length()); pbuffer += maxProbeSetNameLength[dataType]; MmSetUInt8((uint8_t *)pbuffer, entry.chr); pbuffer += sizeof(entry.chr); MmSetUInt32_N((uint32_t *)pbuffer, entry.position); pbuffer += sizeof(entry.position); CopyMetricToBuffer(entry.metrics, pbuffer); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteEntry(MultiDataType dataType, int target, const AllelePeaks &entry) { if (dataBufferSz[dataType] == 0) dataBufferSz[dataType] = maxProbeSetNameLength[dataType] + sizeof(int) + sizeof(entry.chr) + sizeof(entry.position) + GetMetricBufferSize(entry.peaks); char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t *)pbuffer, entry.name.length()); pbuffer += sizeof(int); memcpy(pbuffer, entry.name.c_str(), entry.name.length()); pbuffer += maxProbeSetNameLength[dataType]; MmSetUInt8((uint8_t *)pbuffer, entry.chr); pbuffer += sizeof(entry.chr); MmSetUInt32_N((uint32_t *)pbuffer, entry.position); pbuffer += sizeof(entry.position); CopyMetricToBuffer(entry.peaks, pbuffer); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteEntry(MultiDataType dataType, int target, const MarkerABSignals &entry) { if (dataBufferSz[dataType] == 0) dataBufferSz[dataType] = sizeof(entry.index) + GetMetricBufferSize(entry.metrics); char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t *)pbuffer, entry.index); pbuffer += sizeof(int); CopyMetricToBuffer(entry.metrics, pbuffer); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteEntry(MultiDataType dataType, int target, const CytoGenotypeCallData &entry) { if (dataBufferSz[dataType] == 0) dataBufferSz[dataType] = sizeof(entry.index) + sizeof(entry.call) + sizeof(entry.confidence) + sizeof(entry.forcedCall) + sizeof(entry.aSignal) + sizeof(entry.bSignal) + sizeof (entry.signalStrength) + sizeof(entry.contrast) + GetMetricBufferSize(entry.metrics); char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t *)pbuffer, entry.index); pbuffer += sizeof(entry.index); MmSetUInt8((uint8_t *)pbuffer, entry.call); pbuffer += sizeof(entry.call); MmSetFloat_N((float *)pbuffer, entry.confidence); pbuffer += sizeof(entry.confidence); MmSetUInt8((uint8_t *)pbuffer, entry.forcedCall); pbuffer += sizeof(entry.forcedCall); MmSetFloat_N((float *)pbuffer, entry.aSignal); pbuffer += sizeof(entry.confidence); MmSetFloat_N((float *)pbuffer, entry.bSignal); pbuffer += sizeof(entry.confidence); MmSetFloat_N((float *)pbuffer, entry.signalStrength); pbuffer += sizeof(entry.confidence); MmSetFloat_N((float *)pbuffer, entry.contrast); pbuffer += sizeof(entry.confidence); CopyMetricToBuffer(entry.metrics, pbuffer); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteMultiDataCytoRegionEntry(MultiDataType dataType, int target, const ProbeSetMultiDataCytoRegionData &entry) { if (dataBufferSz[dataType] == 0) dataBufferSz[dataType] = maxProbeSetNameLength[dataType] + sizeof(int) + sizeof(entry.chr) + sizeof(entry.startPosition) + sizeof(entry.stopPosition) + sizeof(entry.call) + sizeof(entry.confidenceScore) + GetMetricBufferSize(entry.metrics); char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t *)pbuffer, entry.name.length()); pbuffer += sizeof(int); memcpy(pbuffer, entry.name.c_str(), entry.name.length()); pbuffer += maxProbeSetNameLength[dataType]; MmSetUInt8((uint8_t *)pbuffer, entry.chr); pbuffer += sizeof(entry.chr); MmSetUInt32_N((uint32_t *)pbuffer, entry.startPosition); pbuffer += sizeof(entry.startPosition); MmSetUInt32_N((uint32_t *)pbuffer, entry.stopPosition); pbuffer += sizeof(entry.stopPosition); MmSetUInt8((uint8_t *)pbuffer, entry.call); pbuffer += sizeof(entry.call); MmSetFloat_N((float *)pbuffer, entry.confidenceScore); pbuffer += sizeof(entry.confidenceScore); CopyMetricToBuffer(entry.metrics, pbuffer); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteMultiDataCopyNumberVariationRegionEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::ProbeSetMultiDataCopyNumberVariationRegionData &entry) { if (dataBufferSz[dataType] == 0) dataBufferSz[dataType] = maxProbeSetNameLength[dataType] + sizeof(int) + sizeof(entry.signal) + sizeof(entry.call) + sizeof(entry.confidenceScore) + GetMetricBufferSize(entry.metrics); char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t *)pbuffer, entry.name.length()); pbuffer += sizeof(int); memcpy(pbuffer, entry.name.c_str(), entry.name.length()); pbuffer += maxProbeSetNameLength[dataType]; MmSetFloat_N((float *)pbuffer, entry.signal); pbuffer += sizeof(entry.signal); MmSetUInt8((uint8_t *)pbuffer, entry.call); pbuffer += sizeof(entry.call); MmSetFloat_N((float *)pbuffer, entry.confidenceScore); pbuffer += sizeof(entry.confidenceScore); CopyMetricToBuffer(entry.metrics, pbuffer); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteEntry(MultiDataType dataType, int target, const DmetBiAllelicData &entry) { if (dataBufferSz[dataType] == 0) { dataBufferSz[dataType] = maxProbeSetNameLength[dataType] + sizeof(int) + sizeof(entry.call) + sizeof(entry.confidence) + sizeof(entry.force) + sizeof(entry.signalA) + sizeof(entry.signalB) + sizeof(entry.contextA) + sizeof(entry.contextB) + GetMetricBufferSize(entry.metrics); } char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t*)pbuffer, entry.name.length()); pbuffer += sizeof(int); memcpy(pbuffer, entry.name.c_str(), entry.name.length()); pbuffer += maxProbeSetNameLength[dataType]; MmSetUInt8((uint8_t*)pbuffer, entry.call); pbuffer += sizeof(entry.call); MmSetFloat_N((float*)pbuffer, entry.confidence); pbuffer += sizeof(entry.confidence); MmSetUInt8((uint8_t*)pbuffer, entry.force); pbuffer += sizeof(entry.force); MmSetFloat_N((float*)pbuffer, entry.signalA); pbuffer += sizeof(entry.signalA); MmSetFloat_N((float*)pbuffer, entry.signalB); pbuffer += sizeof(entry.signalB); MmSetUInt8((uint8_t*)pbuffer, entry.contextA); pbuffer += sizeof(entry.contextA); MmSetUInt8((uint8_t*)pbuffer, entry.contextB); pbuffer += sizeof(entry.contextB); CopyMetricToBuffer(entry.metrics, pbuffer); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteEntry(MultiDataType dataType, int target, const DmetMultiAllelicData &entry) { if (dataBufferSz[dataType] == 0) { dataBufferSz[dataType] = maxProbeSetNameLength[dataType] + sizeof(int) + sizeof(entry.call) + sizeof(entry.confidence) + sizeof(entry.force) + sizeof(entry.alleleCount) + sizeof(entry.signalA) + sizeof(entry.signalB) + sizeof(entry.signalC) + sizeof(entry.signalD) + sizeof(entry.signalE) + sizeof(entry.signalF) + sizeof(entry.contextA) + sizeof(entry.contextB) + sizeof(entry.contextC) + sizeof(entry.contextD) + sizeof(entry.contextE) + sizeof(entry.contextF) + GetMetricBufferSize(entry.metrics); } char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t *)pbuffer, entry.name.length()); pbuffer += sizeof(int); memcpy(pbuffer, entry.name.c_str(), entry.name.length()); pbuffer += maxProbeSetNameLength[dataType]; MmSetUInt8((uint8_t*)pbuffer, entry.call); pbuffer += sizeof(entry.call); MmSetFloat_N((float*)pbuffer, entry.confidence); pbuffer += sizeof(entry.confidence); MmSetUInt8((uint8_t*)pbuffer, entry.force); pbuffer += sizeof(entry.force); MmSetUInt8((uint8_t*)pbuffer, entry.alleleCount); pbuffer += sizeof(entry.alleleCount); MmSetFloat_N((float*)pbuffer, entry.signalA); pbuffer += sizeof(entry.signalA); MmSetFloat_N((float*)pbuffer, entry.signalB); pbuffer += sizeof(entry.signalB); MmSetFloat_N((float*)pbuffer, entry.signalC); pbuffer += sizeof(entry.signalC); MmSetFloat_N((float*)pbuffer, entry.signalD); pbuffer += sizeof(entry.signalD); MmSetFloat_N((float*)pbuffer, entry.signalE); pbuffer += sizeof(entry.signalE); MmSetFloat_N((float*)pbuffer, entry.signalF); pbuffer += sizeof(entry.signalF); MmSetUInt8((uint8_t*)pbuffer, entry.contextA); pbuffer += sizeof(entry.contextA); MmSetUInt8((uint8_t*)pbuffer, entry.contextB); pbuffer += sizeof(entry.contextB); MmSetUInt8((uint8_t*)pbuffer, entry.contextC); pbuffer += sizeof(entry.contextC); MmSetUInt8((uint8_t*)pbuffer, entry.contextD); pbuffer += sizeof(entry.contextD); MmSetUInt8((uint8_t*)pbuffer, entry.contextE); pbuffer += sizeof(entry.contextE); MmSetUInt8((uint8_t*)pbuffer, entry.contextF); pbuffer += sizeof(entry.contextF); CopyMetricToBuffer(entry.metrics, pbuffer); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteEntry(MultiDataType dataType, int target, const DmetCopyNumberData &entry) { if (dataBufferSz[dataType] == 0) { dataBufferSz[dataType] = maxProbeSetNameLength[dataType] + sizeof(int) + sizeof(entry.call) + sizeof(entry.confidence) + sizeof(entry.force) + sizeof(entry.estimate) + sizeof(entry.lower) + sizeof(entry.upper) + GetMetricBufferSize(entry.metrics); } char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t*)pbuffer, entry.name.length()); pbuffer += sizeof(int); memcpy(pbuffer, entry.name.c_str(), entry.name.length()); pbuffer += maxProbeSetNameLength[dataType]; MmSetUInt16_N((uint16_t*)pbuffer, entry.call); pbuffer += sizeof(entry.call); MmSetFloat_N((float*)pbuffer, entry.confidence); pbuffer += sizeof(entry.confidence); MmSetUInt16_N((uint16_t*)pbuffer, entry.force); pbuffer += sizeof(entry.force); MmSetFloat_N((float*)pbuffer, entry.estimate); pbuffer += sizeof(entry.estimate); MmSetFloat_N((float*)pbuffer, entry.lower); pbuffer += sizeof(entry.lower); MmSetFloat_N((float*)pbuffer, entry.upper); pbuffer += sizeof(entry.upper); CopyMetricToBuffer(entry.metrics, pbuffer); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::ChromosomeMultiDataSummaryData & entry) { if (dataBufferSz[dataType] == 0) { dataBufferSz[dataType] = sizeof(entry.chr) + sizeof(int) + maxProbeSetNameLength[dataType] + sizeof(entry.startIndex) + sizeof(entry.markerCount) + sizeof(entry.minSignal) + sizeof(entry.maxSignal) + sizeof(entry.medianCnState) + sizeof(entry.homFrequency) + sizeof(entry.hetFrequency) + GetMetricBufferSize(entry.metrics); } char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt8((uint8_t*) pbuffer, entry.chr); pbuffer += sizeof(u_int8_t); MmSetUInt32_N((u_int32_t *)pbuffer, entry.display.length()); pbuffer += sizeof(int); memcpy(pbuffer, entry.display.c_str(), entry.display.length()); pbuffer += maxProbeSetNameLength[dataType]; MmSetUInt32_N((u_int32_t *)pbuffer, entry.startIndex); pbuffer += sizeof(u_int32_t); MmSetUInt32_N((u_int32_t *)pbuffer, entry.markerCount); pbuffer += sizeof(u_int32_t); MmSetFloat_N((float *)pbuffer, entry.minSignal); pbuffer += sizeof(float); MmSetFloat_N((float *)pbuffer, entry.maxSignal); pbuffer += sizeof(float); MmSetFloat_N((float *)pbuffer, entry.medianCnState); pbuffer += sizeof(float); MmSetFloat_N((float *)pbuffer, entry.homFrequency); pbuffer += sizeof(float); MmSetFloat_N((float *)pbuffer, entry.hetFrequency); pbuffer += sizeof(float); CopyMetricToBuffer(entry.metrics, pbuffer); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::ChromosomeSegmentData & entry) { if (dataBufferSz[dataType] == 0) { dataBufferSz[dataType] = sizeof(entry.segmentId) + sizeof(entry.chr) + sizeof(entry.startPosition) + sizeof(entry.stopPosition) + sizeof(entry.markerCount) + sizeof(entry.meanMarkerDistance) + GetMetricBufferSize(entry.metrics); } char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t *)pbuffer, entry.segmentId); pbuffer += sizeof(entry.segmentId); MmSetUInt8((uint8_t *)pbuffer, entry.chr); pbuffer += sizeof(entry.chr); MmSetUInt32_N((uint32_t *)pbuffer, entry.startPosition); pbuffer += sizeof(entry.startPosition); MmSetUInt32_N((uint32_t *)pbuffer, entry.stopPosition); pbuffer += sizeof(entry.stopPosition); MmSetUInt32_N((uint32_t *)pbuffer, entry.markerCount); pbuffer += sizeof(entry.markerCount); MmSetUInt32_N((uint32_t *)pbuffer, entry.meanMarkerDistance); pbuffer += sizeof(entry.meanMarkerDistance); CopyMetricToBuffer(entry.metrics, pbuffer); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::ChromosomeSegmentDataEx & entry) { if (dataBufferSz[dataType] == 0) { dataBufferSz[dataType] = sizeof(entry.segmentId) + sizeof(entry.chr) + sizeof(entry.startPosition) + sizeof(entry.stopPosition) + sizeof(entry.call) + sizeof(entry.confidence) + sizeof(entry.markerCount) + GetMetricBufferSize(entry.metrics) + sizeof(entry.referenceSampleKey) + sizeof(entry.familialSampleKey) + sizeof(entry.homozygosity) + sizeof(entry.heterozygosity); } char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t *)pbuffer, entry.segmentId); pbuffer += sizeof(entry.segmentId); MmSetUInt32_N((uint32_t *)pbuffer, entry.referenceSampleKey); pbuffer += sizeof(entry.referenceSampleKey); MmSetUInt32_N((uint32_t *)pbuffer, entry.familialSampleKey); pbuffer += sizeof(entry.familialSampleKey); MmSetUInt8((uint8_t *)pbuffer, entry.chr); pbuffer += sizeof(entry.chr); MmSetUInt32_N((uint32_t *)pbuffer, entry.startPosition); pbuffer += sizeof(entry.startPosition); MmSetUInt32_N((uint32_t *)pbuffer, entry.stopPosition); pbuffer += sizeof(entry.stopPosition); MmSetUInt8((uint8_t *)pbuffer, entry.call); pbuffer += sizeof(entry.call); MmSetFloat_N((float *)pbuffer, entry.confidence); pbuffer += sizeof(entry.confidence); MmSetUInt32_N((uint32_t *)pbuffer, entry.markerCount); pbuffer += sizeof(entry.markerCount); MmSetFloat_N((float *)pbuffer, entry.homozygosity); pbuffer += sizeof(entry.homozygosity); MmSetFloat_N((float *)pbuffer, entry.heterozygosity); pbuffer += sizeof(entry.heterozygosity); CopyMetricToBuffer(entry.metrics, pbuffer); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::FamilialSample& entry) { if (dataBufferSz[dataType] == 0) { dataBufferSz[dataType] = sizeof(entry.sampleKey) + sizeof(int) + maxFamilialARRID[dataType] + sizeof(int) + maxFamilialCHPID[dataType] + sizeof(int) + (2*maxFamilialCHPFilename[dataType]) + sizeof(int) + maxFamilialRole[dataType] + sizeof(entry.roleConfidence) + sizeof(u_int8_t); } char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t*) pbuffer, entry.sampleKey); pbuffer += sizeof(u_int32_t); MmSetUInt32_N((uint32_t *)pbuffer, maxFamilialARRID[dataType]); pbuffer += sizeof(int); memcpy(pbuffer, entry.arrID.c_str(), entry.arrID.length()); pbuffer += maxFamilialARRID[dataType]; MmSetUInt32_N((uint32_t *)pbuffer, maxFamilialCHPID[dataType]); pbuffer += sizeof(int); memcpy(pbuffer, entry.chpID.c_str(), entry.chpID.length()); pbuffer += maxFamilialCHPID[dataType]; MmSetUInt32_N((uint32_t *)pbuffer, maxFamilialCHPFilename[dataType]); pbuffer += sizeof(int); for (int ilen=0; ilen<(int)entry.chpFilename.length(); ilen++) { u_int16_t cvalue = (u_int16_t) entry.chpFilename[ilen]; cvalue = htons(cvalue); memcpy((u_int16_t *)(pbuffer+ilen*sizeof(u_int16_t)), &cvalue, sizeof(u_int16_t)); } //memcpy(pbuffer, entry.chpFilename.c_str(), 2*entry.chpFilename.length()); pbuffer += (2*maxFamilialCHPFilename[dataType]); MmSetUInt32_N((uint32_t *)pbuffer, maxFamilialRole[dataType]); pbuffer += sizeof(int); memcpy(pbuffer, entry.role.c_str(), entry.role.length()); pbuffer += maxFamilialRole[dataType]; MmSetUInt8((uint8_t*) pbuffer, (entry.roleValidity == true ? 1 : 0)); pbuffer += sizeof(u_int8_t); MmSetFloat_N((float*) pbuffer, entry.roleConfidence); pbuffer += sizeof(float); dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPMultiDataFileBufferWriter::WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::FamilialSegmentOverlap & entry) { if (dataBufferSz[dataType] == 0) { dataBufferSz[dataType] = sizeof(int) + maxSegmentType[dataType] + sizeof(int) + maxReferenceSegmentID[dataType] + sizeof(int) + maxFamilialSegmentID[dataType] + sizeof(entry.referenceSampleKey) + sizeof(entry.familialSampleKey); } char *buffer = new char[dataBufferSz[dataType]]; memset(buffer, 0, dataBufferSz[dataType]); char *pbuffer = buffer; MmSetUInt32_N((uint32_t *)pbuffer, maxSegmentType[dataType]); pbuffer += sizeof(int); memcpy(pbuffer, entry.segmentType.c_str(), entry.segmentType.length()); pbuffer += maxSegmentType[dataType]; MmSetUInt32_N((uint32_t*) pbuffer, entry.referenceSampleKey); pbuffer += sizeof(u_int32_t); MmSetUInt32_N((uint32_t *)pbuffer, maxReferenceSegmentID[dataType]); pbuffer += sizeof(int); memcpy(pbuffer, entry.referenceSegmentID.c_str(), entry.referenceSegmentID.length()); pbuffer += maxReferenceSegmentID[dataType]; MmSetUInt32_N((uint32_t*) pbuffer, entry.familialSampleKey); pbuffer += sizeof(u_int32_t); MmSetUInt32_N((uint32_t *)pbuffer, maxFamilialSegmentID[dataType]); pbuffer += sizeof(int); memcpy(pbuffer, entry.familialSegmentID.c_str(), entry.familialSegmentID.length()); pbuffer += maxFamilialSegmentID[dataType]; dataBuffers[dataType][target].push_back(buffer); m_BufferSize += dataBufferSz[dataType]; if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } static void ClearBuffer(std::vector &buffer) { int nbufs = (int) buffer.size(); for (int ibuf=0; ibuf 0) { MultiDataType segmentTypes[] = { SegmentCNMultiDataType, SegmentLOHMultiDataType, SegmentCNNeutralLOHMultiDataType, SegmentNormalDiploidMultiDataType, SegmentMosaicismMultiDataType, SegmentNoCallMultiDataType, SegmentGenotypeConcordanceMultiDataType, SegmentGenotypeDiscordanceMultiDataType, SegmentCNLossLOHConcordanceMultiDataType, SegmentCNNeutralLOHConcordanceMultiDataType, SegmentHeteroUPDMultiDataType, SegmentIsoUPDMultiDataType, SegmentDenovoCopyNumberMultiDataType, SegmentHemizygousParentOfOriginMultiDataType }; int nSegmentTypes = sizeof(segmentTypes) / sizeof(MultiDataType); for (int target = 0; target < (int)chpFileNames->size(); target++) { CalvinCHPFileUpdater updater; updater.OpenCHPFile((*chpFileNames)[target].c_str()); MultiDataType dataType; dataType = GenotypeMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateMultiDataGenotypeEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } dataType = CopyNumberMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateMultiDataCopyNumberEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } dataType = CytoMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateMultiDataCytoRegionEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } dataType = ExpressionMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateMultiDataExpressionEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } dataType = CopyNumberVariationMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateMultiDataCopyNumberVariationRegionEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } dataType = DmetBiAllelicMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateDmetBiAllelicEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } dataType = DmetMultiAllelicMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateDmetMultiAllelicEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } dataType = DmetCopyNumberMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateDmetCopyNumberEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } dataType = ChromosomeSummaryMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateChromosomeSummaryEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } dataType = CytoGenotypeCallMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateMultiDataCytoGenotypeEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } for (int iseg=0; iseg 0) { updater.UpdateChromosomeSegmentEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } } dataType = FamilialSegmentOverlapsMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateFamilialSegmentOverlapEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } dataType = FamilialSamplesMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateFamilialSampleEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } dataType = AllelePeaksMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateMultiDataAllelePeaksEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } dataType = MarkerABSignalsMultiDataType; if (dataBuffers.find(dataType) != dataBuffers.end() && dataBuffers[dataType][target].size() > 0) { updater.UpdateMultiDataMarkerABSignalsEntryBuffer(dataType, rowIndexes[dataType][target], dataBufferSz[dataType], dataBuffers[dataType][target]); rowIndexes[dataType][target] += (int) dataBuffers[dataType][target].size(); ClearBuffer(dataBuffers[dataType][target]); } updater.CloseCHPFile(); } } m_BufferSize = 0; } affxparser/src/fusion/calvin_files/writers/src/CalvinCHPMultiDataFileBufferWriter.h0000644000175200017520000002373314516003651031640 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file CalvinCHPMultiDataFileBufferWriter.h * @brief Class for writing multi-data data to a buffer before writing to CHP files. */ #ifndef _CalvinCHPMultiDataFileBufferWriter_HEADER_ #define _CalvinCHPMultiDataFileBufferWriter_HEADER_ #include "calvin_files/data/src/CHPMultiDataData.h" #include "calvin_files/data/src/ProbeSetMultiDataData.h" // #include #include #include #include #include // #define MAX_BUFFER_SIZE 5242880 // 5 MB namespace affymetrix_calvin_io { class CHPMultiDataFileBufferWriter { public: /*! Constructor */ CHPMultiDataFileBufferWriter(); /*! Destructor */ ~CHPMultiDataFileBufferWriter(); /*! Set maximum buffer size */ void SetMaxBufferSize(int MaxBufferSize) { m_MaxBufferSize = MaxBufferSize; } /*! Cleans up memory */ void Cleanup(); /*! Initialize the buffer writer * @param CHPFileNames Reference to a list of CHP file names. * @param dataTypes The data types. * @param maxProbeSetNmLn The maximum probe set name length */ void Initialize(std::vector *CHPFileNames, std::vector &dataTypes, std::map &maxProbeSetNmLn); /*! Initialize the buffer writer * @param CHPFileNames Reference to a list of CHP file names. * @param dataTypes The data types. * @param maxSegmentTypeLn The maximum segment type length * @param maxReferenceSegmentIDLn The maximum ref sample key length * @param maxFamilialSegmentIDLn The maximum familial seg id length * @param maxFamilialARRIDLn The maximum familial arr id length * @param maxFamilialCHPIDLn The maximum familial sample chp id length * @param maxFamilialCHPFilenameLn The maximum familial sample chp file length * @param maxFamilialRoleLn The maximum familial sample role length */ void Initialize(std::vector *CHPFileNames, std::vector &dataTypes, std::map &maxSegmentTypeLn, std::map &maxReferenceSegmentIDLn, std::map &maxFamilialSegmentIDLn, std::map &maxFamilialARRIDLn, std::map &maxFamilialCHPIDLn, std::map &maxFamilialCHPFilenameLn, std::map &maxFamilialRoleLn); /*! Write a genotype entry to buffer. If the buffer is full, flush it. * @param dataType The data type. * @param target Target for the entry. * @param entry Value for the genotype entry. */ void WriteMultiDataGenotypeEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::ProbeSetMultiDataGenotypeData &entry); /*! Write a copy number entry to buffer. If the buffer is full, flush it. * @param dataType The data type. * @param target Target for the entry. * @param entry Value for the copy number entry. */ void WriteMultiDataCopyNumberEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::ProbeSetMultiDataCopyNumberData &entry); /*! Write a cyto region entry to buffer. If the buffer is full, flush it. * @param dataType The data type. * @param target Target for the entry. * @param entry Value for the copy number entry. */ void WriteMultiDataCytoRegionEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::ProbeSetMultiDataCytoRegionData &entry); /*! Write an expression entry to buffer. If the buffer is full, flush it. * @param dataType The data type. * @param target Target for the entry. * @param entry Value for the expression entry. */ void WriteMultiDataExpressionEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::ProbeSetMultiDataExpressionData &entry); /*! Write a copy number variation region entry to buffer. If the buffer is full, flush it. * @param dataType The data type. * @param target Target for the entry. * @param entry Value for the copy number entry. */ void WriteMultiDataCopyNumberVariationRegionEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::ProbeSetMultiDataCopyNumberVariationRegionData &entry); /*! Write a DMET copy number entry to buffer. If the buffer is full, flush it. * @param dataType The data type. * @param target Target for the entry. * @param entry Value for the copy number entry. */ void WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::DmetCopyNumberData &entry); /*! Write a DMET multi-allelic entry to buffer. If the buffer is full, flush it. * @param dataType The data type. * @param target Target for the entry. * @param entry Value for the copy number entry. */ void WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::DmetMultiAllelicData &entry); /*! Write a DMET copy number entry to buffer. If the buffer is full, flush it. * @param dataType The data type. * @param target Target for the entry. * @param entry Value for the copy number entry. */ void WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::DmetBiAllelicData &entry); /*! Write an entry. * @param entry A pointer to a multi data entry. */ void WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::ChromosomeMultiDataSummaryData & entry); /*! Write an entry. * @param entry A pointer to a multi data entry. */ void WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::ChromosomeSegmentData & entry); /*! Write an entry. * @param entry A pointer to a multi data entry. */ void WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::ChromosomeSegmentDataEx & entry); /*! Write an entry. * @param entry A pointer to a multi data entry. */ void WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::FamilialSegmentOverlap & entry); /*! Write an entry. * @param entry A pointer to a multi data entry. */ void WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::FamilialSample & entry); /*! Write an entry. * @param entry A pointer to a multi data entry. */ void WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::AllelePeaks &entry); /*! Write an entry. * @param entry A pointer to a multi data entry. */ void WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::MarkerABSignals &entry); /*! Write an entry. * @param entry A pointer to a multi data entry. */ void WriteEntry(MultiDataType dataType, int target, const affymetrix_calvin_data::CytoGenotypeCallData &entry); /*! Write the content of the buffer to CHP files. */ void FlushBuffer(); private: /*! Initialize the buffer writer * @param CHPFileNames Reference to a list of CHP file names. * @param dataTypes The data types. * @param maxProbeSetNmLn The maximum probe set name length * @param maxSegmentTypeLn The maximum segment type length * @param maxReferenceSegmentIDLn The maximum ref sample key length * @param maxFamilialSegmentIDLn The maximum familial seg id length * @param maxFamilialARRIDLn The maximum familial arr id length * @param maxFamilialCHPIDLn The maximum familial sample chp id length * @param maxFamilialCHPFilenameLn The maximum familial sample chp file length * @param maxFamilialRoleLn The maximum familial sample role length */ void Initialize(std::vector *CHPFileNames, std::vector &dataTypes, std::map &maxProbeSetNmLn, std::map &maxSegmentTypeLn, std::map &maxReferenceSegmentIDLn, std::map &maxFamilialSegmentIDLn, std::map &maxFamilialARRIDLn, std::map &maxFamilialCHPIDLn, std::map &maxFamilialCHPFilenameLn, std::map &maxFamilialRoleLn); /*! Pointer to list of CHP file names. */ std::vector *chpFileNames; /*! List of targets used for storing entries. */ std::map > > dataBuffers; /*! Buffer for storing row indexes. */ std::map > rowIndexes; /*! Size of the current buffer in bytes. */ int m_BufferSize; /*! Maximum size of the buffer before it gets flushed */ int m_MaxBufferSize; /*! The maximum probe set name length */ std::map maxProbeSetNameLength; /*! The maximum segment type length */ std::map maxSegmentType; /*! The maximum ref sample key length */ std::map maxReferenceSegmentID; /*! The maximum familial seg id length */ std::map maxFamilialSegmentID; /*! The maximum familial sample arr id length */ std::map maxFamilialARRID; /*! The maximum familial sample chp id length */ std::map maxFamilialCHPID; /*! The maximum familial sample chp file length */ std::map maxFamilialCHPFilename; /*! The maximum familial sample role length */ std::map maxFamilialRole; /*! The size of the buffer for each data type. */ std::map dataBufferSz; }; } #endif // _CalvinCHPMultiDataFileBufferWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCHPMultiDataFileUpdater.cpp0000644000175200017520000002356314516003651031172 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CalvinCHPMultiDataFileUpdater.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_data; using namespace std; /*! The data group index. */ #define MULTI_DATA_DATA_GROUP 0 /* * Initialize any needed members. */ CalvinCHPMultiDataFileUpdater::CalvinCHPMultiDataFileUpdater() : DataSetUpdater() { } /* * Clean up. */ CalvinCHPMultiDataFileUpdater::~CalvinCHPMultiDataFileUpdater() { } void CalvinCHPMultiDataFileUpdater::Initialize(const char *file) { DataSetUpdater::Initialize(file); dataSetIndexMap.clear(); int nds=(int)dataSetNames[0].size(); int ndt=sizeof(MultiDataDataTypes) / sizeof(MultiDataType); for (int ids=0; ids &metricColumns) { int dsIndex = dataSetIndexMap[dataType]; UpdateMultiData(dataType, row, entry); UpdateMetrics(dsIndex, row, 3, entry.metrics, metricColumns); } /* * Update the value for the given row. */ void CalvinCHPMultiDataFileUpdater::UpdateMultiData(MultiDataType dataType, int row, const ProbeSetMultiDataCopyNumberData &entry, const vector &metricColumns) { int dsIndex = dataSetIndexMap[dataType]; Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 1, entry.chr); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 2, entry.position); UpdateMetrics(dsIndex, row, 3, entry.metrics, metricColumns); } /* * Update the value for the given row. */ void CalvinCHPMultiDataFileUpdater::UpdateMultiData(MultiDataType dataType, int row, const ProbeSetMultiDataCytoRegionData &entry, const vector &metricColumns) { int dsIndex = dataSetIndexMap[dataType]; Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 1, entry.chr); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 2, entry.startPosition); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 3, entry.stopPosition); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 4, entry.call); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 5, entry.confidenceScore); UpdateMetrics(dsIndex, row, 6, entry.metrics, metricColumns); } /* * Update the value for the given row. */ void CalvinCHPMultiDataFileUpdater::UpdateMultiData(MultiDataType dataType, int row, const ProbeSetMultiDataExpressionData &entry) { int dsIndex = dataSetIndexMap[dataType]; Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 1, entry.quantification); } /* * Update the value for the given row. */ void CalvinCHPMultiDataFileUpdater::UpdateMultiData(MultiDataType dataType, int row, const ProbeSetMultiDataExpressionData &entry, const vector &metricColumns) { int dsIndex = dataSetIndexMap[dataType]; UpdateMultiData(dataType, row, entry); UpdateMetrics(dsIndex, row, 2, entry.metrics, metricColumns); } void CalvinCHPMultiDataFileUpdater::UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::ProbeSetMultiDataCopyNumberVariationRegionData &entry, const std::vector &metricColumns) { int dsIndex = dataSetIndexMap[dataType]; Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 1, entry.signal); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 2, entry.call); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 3, entry.confidenceScore); UpdateMetrics(dsIndex, row, 4, entry.metrics, metricColumns); } /* * Update the value for the given row. */ void CalvinCHPMultiDataFileUpdater::UpdateMultiData(MultiDataType dataType, int row, const DmetBiAllelicData &entry) { vector metricColumns; UpdateMultiData(dataType, row, entry, metricColumns); } /* * Update the value for the given row. */ void CalvinCHPMultiDataFileUpdater::UpdateMultiData(MultiDataType dataType, int row, const DmetBiAllelicData &entry, const vector &metricColumns) { int dsIndex = dataSetIndexMap[dataType]; Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 1, entry.call); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 2, entry.confidence); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 3, entry.force); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 4, entry.signalA); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 5, entry.signalB); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 6, entry.contextA); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 7, entry.contextB); if(metricColumns.size() > 0) { UpdateMetrics(dsIndex, row, 8, entry.metrics, metricColumns); } } /* * Update the value for the given row. */ void CalvinCHPMultiDataFileUpdater::UpdateMultiData(MultiDataType dataType, int row, const DmetCopyNumberData &entry) { vector metricColumns; UpdateMultiData(dataType, row, entry, metricColumns); } /* * Update the value for the given row. */ void CalvinCHPMultiDataFileUpdater::UpdateMultiData(MultiDataType dataType, int row, const DmetCopyNumberData &entry, const vector &metricColumns) { int dsIndex = dataSetIndexMap[dataType]; Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 1, entry.call); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 2, entry.confidence); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 3, entry.force); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 4, entry.estimate); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 5, entry.lower); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 6, entry.upper); if(metricColumns.size() > 0) { UpdateMetrics(dsIndex, row, 7, entry.metrics, metricColumns); } } /* * Update the value for the given row. */ void CalvinCHPMultiDataFileUpdater::UpdateMultiData(MultiDataType dataType, int row, const DmetMultiAllelicData &entry) { vector metricColumns; UpdateMultiData(dataType, row, entry, metricColumns); } /* * Update the value for the given row. */ void CalvinCHPMultiDataFileUpdater::UpdateMultiData(MultiDataType dataType, int row, const DmetMultiAllelicData &entry, const vector &metricColumns) { int dsIndex = dataSetIndexMap[dataType]; Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 1, entry.call); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 2, entry.confidence); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 3, entry.force); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 4, entry.alleleCount); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 5, entry.signalA); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 6, entry.signalB); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 7, entry.signalC); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 8, entry.signalD); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 9, entry.signalE); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 10, entry.signalF); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 11, entry.contextA); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 12, entry.contextB); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 13, entry.contextC); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 14, entry.contextD); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 15, entry.contextE); Update(MULTI_DATA_DATA_GROUP, dsIndex, row, 16, entry.contextF); if(metricColumns.size() > 0) { UpdateMetrics(dsIndex, row, 17, entry.metrics, metricColumns); } } void CalvinCHPMultiDataFileUpdater::UpdateMetrics(int dataSetIndex, int row, int startColIndex, const std::vector &metrics, const std::vector &metricColumns) { int colIndex=0; for (vector::const_iterator it=metrics.begin(); it!=metrics.end(); it++) { switch (metricColumns[colIndex].GetColumnType()) { case ByteColType: Update(MULTI_DATA_DATA_GROUP, dataSetIndex, row, colIndex+startColIndex, it->GetValueInt8()); break; case UByteColType: Update(MULTI_DATA_DATA_GROUP, dataSetIndex, row, colIndex+startColIndex, it->GetValueUInt8()); break; case ShortColType: Update(MULTI_DATA_DATA_GROUP, dataSetIndex, row, colIndex+startColIndex, it->GetValueInt16()); break; case UShortColType: Update(MULTI_DATA_DATA_GROUP, dataSetIndex, row, colIndex+startColIndex, it->GetValueUInt16()); break; case IntColType: Update(MULTI_DATA_DATA_GROUP, dataSetIndex, row, colIndex+startColIndex, it->GetValueInt32()); break; case UIntColType: Update(MULTI_DATA_DATA_GROUP, dataSetIndex, row, colIndex+startColIndex, it->GetValueUInt32()); break; case FloatColType: Update(MULTI_DATA_DATA_GROUP, dataSetIndex, row, colIndex+startColIndex, it->GetValueFloat()); break; case ASCIICharColType: Update(MULTI_DATA_DATA_GROUP, dataSetIndex, row, colIndex+startColIndex, it->GetValueAscii()); break; case UnicodeCharColType: Update(MULTI_DATA_DATA_GROUP, dataSetIndex, row, colIndex+startColIndex, it->GetValueText()); break; } ++colIndex; } } affxparser/src/fusion/calvin_files/writers/src/CalvinCHPMultiDataFileUpdater.h0000644000175200017520000001473614516003651030641 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! \file CalvinCHPMultiDataFileUpdater.h Provides interfaces to update data in a "Calvin" binary "MultiData" data file. */ #ifndef _CalvinCHPMultiDataFileUpdater_HEADER_ #define _CalvinCHPMultiDataFileUpdater_HEADER_ #include "calvin_files/data/src/CHPMultiDataData.h" #include "calvin_files/data/src/ColumnInfo.h" #include "calvin_files/data/src/ProbeSetMultiDataData.h" #include "calvin_files/writers/src/DataSetUpdater.h" // #include #include // namespace affymetrix_calvin_io { /*! Provides interfaces to update data in a "Calvin" binary "MultiData" data file. * This class assumes that the file has been completely written and that * only existing data groups/sets/rows/cols are being modified. */ class CalvinCHPMultiDataFileUpdater : public DataSetUpdater { public: /*! Constructor */ CalvinCHPMultiDataFileUpdater(); /*! Destructor */ ~CalvinCHPMultiDataFileUpdater(); /*! Initialize the class given a "calvin" data file. * @param file The name of the file. * @exception affymetrix_calvin_exceptions::FileNotFoundException The file does not exist. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ void Initialize(const char *file); /*! Update the entry for the given row * @param row The row index. * @param entry The new MultiData value. * @param metricColumns The column information. */ void UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::ProbeSetMultiDataGenotypeData &entry, const std::vector &metricColumns); /*! Update only the call and confidence for the given row * @param row The row index. * @param entry The new MultiData value. */ void UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::ProbeSetMultiDataGenotypeData &entry); /*! Update the entry for the given row * @param row The row index. * @param entry The new MultiData value. * @param metricColumns The column information. */ void UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::ProbeSetMultiDataCopyNumberData &entry, const std::vector &metricColumns); /*! Update the entry for the given row * @param row The row index. * @param entry The new MultiData value. */ void UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::ProbeSetMultiDataCytoRegionData &entry, const std::vector &metricColumns); /*! Update the entry for the given row * @param row The row index. * @param entry The new MultiData value. * @param metricColumns The column information. */ void UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::ProbeSetMultiDataExpressionData &entry, const std::vector &metricColumns); /*! Update only the call and confidence for the given row * @param row The row index. * @param entry The new MultiData value. */ void UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::ProbeSetMultiDataExpressionData &entry); /*! Update the entry for the given row * @param row The row index. * @param entry The new MultiData value. */ void UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::ProbeSetMultiDataCopyNumberVariationRegionData &entry, const std::vector &metricColumns); /*! Update the entry for the given row * @param row The row index. * @param entry The new MultiData value. * @param metricColumns The column information. */ void UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::DmetBiAllelicData &entry, const std::vector &metricColumns); /*! Update only the call and confidence for the given row * @param row The row index. * @param entry The new MultiData value. */ void UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::DmetBiAllelicData &entry); /*! Update the entry for the given row * @param row The row index. * @param entry The new MultiData value. * @param metricColumns The column information. */ void UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::DmetCopyNumberData &entry, const std::vector &metricColumns); /*! Update only the call and confidence for the given row * @param row The row index. * @param entry The new MultiData value. */ void UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::DmetCopyNumberData &entry); /*! Update the entry for the given row * @param row The row index. * @param entry The new MultiData value. * @param metricColumns The column information. */ void UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::DmetMultiAllelicData &entry, const std::vector &metricColumns); /*! Update only the call and confidence for the given row * @param row The row index. * @param entry The new MultiData value. */ void UpdateMultiData(MultiDataType dataType, int row, const affymetrix_calvin_data::DmetMultiAllelicData &entry); private: std::map dataSetIndexMap; /*! Update the metrics for the given row * @param row The row index. * @param dataSetIndex The data set index * @param startColIndex The starting column * @param metricColumns The column information. */ void UpdateMetrics(int dataSetIndex, int row, int startColIndex, const std::vector &metrics, const std::vector &metricColumns); }; } #endif // _CalvinCHPMultiDataFileUpdater_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCHPMultiDataFileWriter.cpp0000644000175200017520000003030114516003651031026 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CalvinCHPMultiDataFileWriter.h" // #include "calvin_files/data/src/CHPMultiDataData.h" // using namespace std; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; CHPMultiDataFileWriter::CHPMultiDataFileWriter(CHPMultiDataData &p) { data = &p; map &info = p.GetDataSetInfo(); map::iterator it; for (it=info.begin(); it!=info.end(); it++) { dataTypeToIndex[it->first] = it->second.dataSetIndex; indexToDataType[it->second.dataSetIndex] = it->first; } writer = new GenericFileWriter(p.GetFileHeader()); WriteHeaders(); } CHPMultiDataFileWriter::~CHPMultiDataFileWriter() { data = NULL; delete writer; } void CHPMultiDataFileWriter::WriteHeaders() { writer->WriteHeader(); int ng = writer->GetDataGroupCnt(); for (int ig=0; igGetDataGroupWriter(ig); dataGroupWriter.WriteHeader(); int32_t n = dataGroupWriter.GetDataSetWriterCnt(); for (int32_t i=0; iWriteHeader(); entryPos[indexToDataType[i]] = SetFilePositions(); dataGroupWriter.UpdateNextDataGroupPos(); } } } int32_t CHPMultiDataFileWriter::SetFilePositions() { int32_t dataSetSz = dataSetWriter->GetDataSetSize(); int32_t offset = writer->GetFilePos(); writer->SeekFromCurrentPos(dataSetSz + 1); dataSetWriter->UpdateNextDataSetOffset(); return offset; } void CHPMultiDataFileWriter::SeekToDataSet(MultiDataType dataType) { int ig = data->GetDataGroupIndex(dataType); DataGroupWriter &dataGroupWriter = writer->GetDataGroupWriter(ig); writer->SeekFromBeginPos(entryPos[dataType]); dataSetWriter = &dataGroupWriter.GetDataSetWriter(dataTypeToIndex[dataType]); maxName = data->GetMaxProbeSetName(dataType); if (dataType == FamilialSegmentOverlapsMultiDataType) { std::map &dinfo = data->GetDataSetInfo(); maxSegmentType = dinfo[dataType].maxSegmentType; maxReferenceSegmentID = dinfo[dataType].maxReferenceSegmentID; maxFamilialSegmentID = dinfo[dataType].maxFamilialSegmentID; } else if (dataType == FamilialSamplesMultiDataType) { std::map &dinfo = data->GetDataSetInfo(); maxFamilialARRID = dinfo[dataType].maxFamilialARRID; maxFamilialCHPID = dinfo[dataType].maxFamilialCHPID; maxFamilialCHPFile = dinfo[dataType].maxFamilialCHPFile; maxFamilialRole = dinfo[dataType].maxFamilialRole; } currentDataType = dataType; } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::ProbeSetMultiDataGenotypeData & p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.name, maxName); dataSetWriter->Write(p.call); dataSetWriter->Write(p.confidence); WriteMetrics(p.metrics); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::ProbeSetMultiDataCopyNumberData & p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.name, maxName); dataSetWriter->Write(p.chr); dataSetWriter->Write(p.position); WriteMetrics(p.metrics); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::AllelePeaks & p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.name, maxName); dataSetWriter->Write(p.chr); dataSetWriter->Write(p.position); WriteMetrics(p.peaks); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::MarkerABSignals & p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.index); WriteMetrics(p.metrics); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::CytoGenotypeCallData & p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.index); dataSetWriter->Write(p.call); dataSetWriter->Write(p.confidence); dataSetWriter->Write(p.forcedCall); dataSetWriter->Write(p.aSignal); dataSetWriter->Write(p.bSignal); dataSetWriter->Write(p.signalStrength); dataSetWriter->Write(p.contrast); WriteMetrics(p.metrics); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::ProbeSetMultiDataCytoRegionData & p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.name, maxName); dataSetWriter->Write(p.chr); dataSetWriter->Write(p.startPosition); dataSetWriter->Write(p.stopPosition); dataSetWriter->Write(p.call); dataSetWriter->Write(p.confidenceScore); WriteMetrics(p.metrics); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::ProbeSetMultiDataExpressionData & p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.name, maxName); dataSetWriter->Write(p.quantification); WriteMetrics(p.metrics); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::ProbeSetMultiDataCopyNumberVariationRegionData & p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.name, maxName); dataSetWriter->Write(p.signal); dataSetWriter->Write(p.call); dataSetWriter->Write(p.confidenceScore); WriteMetrics(p.metrics); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::DmetBiAllelicData &p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.name, maxName); dataSetWriter->Write(p.call); dataSetWriter->Write(p.confidence); dataSetWriter->Write(p.force); dataSetWriter->Write(p.signalA); dataSetWriter->Write(p.signalB); dataSetWriter->Write(p.contextA); dataSetWriter->Write(p.contextB); WriteMetrics(p.metrics); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::DmetCopyNumberData &p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.name, maxName); dataSetWriter->Write(p.call); dataSetWriter->Write(p.confidence); dataSetWriter->Write(p.force); dataSetWriter->Write(p.estimate); dataSetWriter->Write(p.lower); dataSetWriter->Write(p.upper); WriteMetrics(p.metrics); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::DmetMultiAllelicData &p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.name, maxName); dataSetWriter->Write(p.call); dataSetWriter->Write(p.confidence); dataSetWriter->Write(p.force); dataSetWriter->Write(p.alleleCount); dataSetWriter->Write(p.signalA); dataSetWriter->Write(p.signalB); dataSetWriter->Write(p.signalC); dataSetWriter->Write(p.signalD); dataSetWriter->Write(p.signalE); dataSetWriter->Write(p.signalF); dataSetWriter->Write(p.contextA); dataSetWriter->Write(p.contextB); dataSetWriter->Write(p.contextC); dataSetWriter->Write(p.contextD); dataSetWriter->Write(p.contextE); dataSetWriter->Write(p.contextF); WriteMetrics(p.metrics); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::ChromosomeMultiDataSummaryData & p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.chr); dataSetWriter->Write(p.display, maxName); dataSetWriter->Write(p.startIndex); dataSetWriter->Write(p.markerCount); dataSetWriter->Write(p.minSignal); dataSetWriter->Write(p.maxSignal); dataSetWriter->Write(p.medianCnState); dataSetWriter->Write(p.homFrequency); dataSetWriter->Write(p.hetFrequency); WriteMetrics(p.metrics); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::ChromosomeSegmentData & p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.segmentId); dataSetWriter->Write(p.chr); dataSetWriter->Write(p.startPosition); dataSetWriter->Write(p.stopPosition); dataSetWriter->Write(p.markerCount); dataSetWriter->Write(p.meanMarkerDistance); WriteMetrics(p.metrics); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::ChromosomeSegmentDataEx & p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.segmentId); dataSetWriter->Write(p.referenceSampleKey); dataSetWriter->Write(p.familialSampleKey); dataSetWriter->Write(p.chr); dataSetWriter->Write(p.startPosition); dataSetWriter->Write(p.stopPosition); dataSetWriter->Write(p.call); dataSetWriter->Write(p.confidence); dataSetWriter->Write(p.markerCount); dataSetWriter->Write(p.homozygosity); dataSetWriter->Write(p.heterozygosity); WriteMetrics(p.metrics); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::FamilialSample & p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.sampleKey); dataSetWriter->Write(p.arrID, maxFamilialARRID); dataSetWriter->Write(p.chpID, maxFamilialCHPID); dataSetWriter->Write(p.chpFilename, maxFamilialCHPFile); dataSetWriter->Write(p.role, maxFamilialRole); dataSetWriter->Write((u_int8_t)(p.roleValidity == true ? 1 : 0)); dataSetWriter->Write(p.roleConfidence); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteEntry(const affymetrix_calvin_data::FamilialSegmentOverlap & p) { writer->SeekFromBeginPos(entryPos[currentDataType]); dataSetWriter->Write(p.segmentType, maxSegmentType); dataSetWriter->Write(p.referenceSampleKey); dataSetWriter->Write(p.referenceSegmentID, maxReferenceSegmentID); dataSetWriter->Write(p.familialSampleKey); dataSetWriter->Write(p.familialSegmentID, maxFamilialSegmentID); entryPos[currentDataType] = writer->GetFilePos(); } void CHPMultiDataFileWriter::WriteMetrics(const std::vector &metrics) { int ncols = (int) metrics.size(); for (int icol=0; icolWrite(nv.GetValueInt8()); break; case ParameterNameValueType::UInt8Type: dataSetWriter->Write(nv.GetValueUInt8()); break; case ParameterNameValueType::Int16Type: dataSetWriter->Write(nv.GetValueInt16()); break; case ParameterNameValueType::UInt16Type: dataSetWriter->Write(nv.GetValueUInt16()); break; case ParameterNameValueType::Int32Type: dataSetWriter->Write(nv.GetValueInt32()); break; case ParameterNameValueType::UInt32Type: dataSetWriter->Write(nv.GetValueUInt32()); break; case ParameterNameValueType::FloatType: dataSetWriter->Write(nv.GetValueFloat()); break; case ParameterNameValueType::AsciiType: dataSetWriter->Write(nv.GetValueAscii(), data->GetMetricColumnLength(currentDataType, icol)); break; case ParameterNameValueType::TextType: dataSetWriter->Write(nv.GetValueText(), data->GetMetricColumnLength(currentDataType, icol)); break; // @todo check this is ok. case ParameterNameValueType::UnknownType: break; } } } affxparser/src/fusion/calvin_files/writers/src/CalvinCHPMultiDataFileWriter.h0000644000175200017520000001321014516003651030473 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPMultiDataFileWriter_HEADER_ #define _CHPMultiDataFileWriter_HEADER_ /*! \file CalvinCHPMultiDataFileWriter.h Contains classes to write a multi data CHP file. */ #include "calvin_files/data/src/CHPMultiDataData.h" #include "calvin_files/data/src/ProbeSetMultiDataData.h" #include "calvin_files/writers/src/GenericFileWriter.h" // #include #include #include // namespace affymetrix_calvin_io { /*! A class to write a multi data CHP file. */ class CHPMultiDataFileWriter { private: /*! The file writer. */ GenericFileWriter* writer; /*! The data set writer. */ DataSetWriter* dataSetWriter; /*! The file position of the entry for each data set. */ std::map entryPos; /*! A map of data type to data set index. */ std::map dataTypeToIndex; /*! A map of data set index to data type */ std::map indexToDataType; /*! The maximum length of the name column. */ int maxName; /*! The maximum length of the familial seg type column. */ int maxSegmentType; /*! The maximum length of the familial ref seg id column. */ int maxReferenceSegmentID; /*! The maximum length of the familial seg id column. */ int maxFamilialSegmentID; /*! The maximum length of the familial sample ARR id */ int maxFamilialARRID; /*! The maximum length of the familial sample CHP id */ int maxFamilialCHPID; /*! The maximum length of the familial sample CHP file name */ int maxFamilialCHPFile; /*! The maximum length of the familial sample role */ int maxFamilialRole; /*! The data type being written. */ MultiDataType currentDataType; /*! The data. */ CHPMultiDataData *data; /*! Write the metrics to the file. * @param metrics Other metrics associated with the probe set */ void WriteMetrics(const std::vector &metrics); public: /*! Constructor * @param p Pointer to the multi data CHP data object. */ CHPMultiDataFileWriter(CHPMultiDataData& p); /*! Destructor */ ~CHPMultiDataFileWriter(); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::ProbeSetMultiDataGenotypeData & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::ProbeSetMultiDataCopyNumberData & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::ProbeSetMultiDataCytoRegionData & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::ProbeSetMultiDataExpressionData & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::ProbeSetMultiDataCopyNumberVariationRegionData & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::DmetBiAllelicData & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::DmetMultiAllelicData & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::DmetCopyNumberData & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::ChromosomeMultiDataSummaryData & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::ChromosomeSegmentData & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::ChromosomeSegmentDataEx & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::FamilialSample & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::FamilialSegmentOverlap & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::AllelePeaks & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::MarkerABSignals & p); /*! Write an entry. * @param p A pointer to a multi data entry. */ void WriteEntry(const affymetrix_calvin_data::CytoGenotypeCallData & p); /*! Seeks to the data set. * @param dataType The data type */ void SeekToDataSet(MultiDataType dataType); private: /*! Writes the headers to the file. */ void WriteHeaders(); /*! Sets the file positions. * @return The current file position. */ int32_t SetFilePositions(); }; } #endif // _CHPMultiDataFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCHPQuantificationDetectionFileBufferWriter.cpp0000644000175200017520000000633714516003651035125 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file CalvinCHPQuantificationDetectionFileBufferWriter.cpp * @author David Le * @date Mon May 15 12:09:42 2006 * * @brief Class for writing quantifications to a buffer before writing to CHP files. */ #include "calvin_files/writers/src/CalvinCHPQuantificationDetectionFileBufferWriter.h" // #include "calvin_files/writers/src/CalvinCHPFileUpdater.h" // using namespace affymetrix_calvin_io; CHPQuantificationDetectionFileBufferWriter::CHPQuantificationDetectionFileBufferWriter() { m_BufferSize = 0; m_MaxBufferSize = MAX_BUFFER_SIZE; } CHPQuantificationDetectionFileBufferWriter::~CHPQuantificationDetectionFileBufferWriter() { FlushBuffer(); Cleanup(); } void CHPQuantificationDetectionFileBufferWriter::Cleanup() { for (int target=0; target *CHPFileNames) { m_CHPFileNames = CHPFileNames; Cleanup(); for (int i=0; isize(); i++) { std::vector quantificationBuffer; m_TargetQuantificationDetectionBuffers.push_back(quantificationBuffer); m_TargetQuantificationDetectionRowIndexes.push_back(0); } m_BufferSize = 0; } void CHPQuantificationDetectionFileBufferWriter::WriteQuantificationEntry(int target, float quantification) { m_TargetQuantificationDetectionBuffers[target].push_back(quantification); m_BufferSize += sizeof(float); if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPQuantificationDetectionFileBufferWriter::FlushBuffer() { if(m_BufferSize > 0) { for (int target=0; targetsize(); target++) { CalvinCHPFileUpdater updater; updater.OpenCHPFile((*m_CHPFileNames)[target].c_str()); updater.UpdateExpressionQuantificationBuffer(m_TargetQuantificationDetectionRowIndexes[target], m_TargetQuantificationDetectionBuffers[target]); updater.CloseCHPFile(); m_TargetQuantificationDetectionRowIndexes[target] += m_TargetQuantificationDetectionBuffers[target].size(); m_TargetQuantificationDetectionBuffers[target].clear(); } } m_BufferSize = 0; } affxparser/src/fusion/calvin_files/writers/src/CalvinCHPQuantificationDetectionFileBufferWriter.h0000644000175200017520000000526614516003651034572 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file CalvinCHPQuantificationDetectionFileBufferWriter.h * @author David Le * @date Mon May 15 12:09:42 2006 * * @brief Class for writing Quantifications to a buffer before writing to CHP files. */ #ifndef _CHPQUANTIFICATIONDETECTIONFILEBUFFERWRITER_HEADER_ #define _CHPQUANTIFICATIONDETECTIONFILEBUFFERWRITER_HEADER_ #include #include #include // #define MAX_BUFFER_SIZE 5242880 // 5 MB namespace affymetrix_calvin_io { class CHPQuantificationDetectionFileBufferWriter { public: /*! Constructor */ CHPQuantificationDetectionFileBufferWriter(); /*! Destructor */ ~CHPQuantificationDetectionFileBufferWriter(); /*! Set maximum buffer size */ void SetMaxBufferSize(int MaxBufferSize) { m_MaxBufferSize = MaxBufferSize; } /*! Cleans up memory */ void Cleanup(); /*! Initialize Quantification buffer writer * @param CHPFileNames Reference to a list of CHP file names. */ void Initialize(std::vector *CHPFileNames); /*! Write a quantification entry to buffer. If the buffer is full, flush it. * @param target Target for the quantification entry. * @param quantification Value for the quantification entry. */ void WriteQuantificationEntry(int target, float quantification); /*! Write the content of the buffer to CHP files. */ void FlushBuffer(); private: // Pointer to list of CHP file names. std::vector *m_CHPFileNames; // List of targets used for storing quantification entries. std::vector< std::vector > m_TargetQuantificationDetectionBuffers; // Buffer for storing quantification row indexes. std::vector m_TargetQuantificationDetectionRowIndexes; // Size of the current buffer in bytes. int m_BufferSize; // Maximum size of the buffer before it gets flushed int m_MaxBufferSize; }; } #endif // _CHPQUANTIFICATIONDETECTIONFILEBUFFERWRITER_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCHPQuantificationDetectionFileUpdater.cpp0000644000175200017520000000421114516003651034110 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CalvinCHPQuantificationDetectionFileUpdater.h" // using namespace affymetrix_calvin_io; using namespace std; /*! The data set index. */ #define QUANTIFICATION_DETECTION_DATA_SET 0 /*! The data group index. */ #define QUANTIFICATION_DETECTION_DATA_GROUP 0 /*! The quantification column. */ #define QUANTIFICATION_DETECTION_QUANTIFICATION_COLUMN 1 /*! The quantification column. */ #define QUANTIFICATION_DETECTION_DETECTION_COLUMN 2 /* * Initialize any needed members. */ CalvinCHPQuantificationDetectionFileUpdater::CalvinCHPQuantificationDetectionFileUpdater() : DataSetUpdater() { } /* * Clean up. */ CalvinCHPQuantificationDetectionFileUpdater::~CalvinCHPQuantificationDetectionFileUpdater() { } /* * Update the value for the given row. */ void CalvinCHPQuantificationDetectionFileUpdater::UpdateQuantification(int row, float quantification) { Update(QUANTIFICATION_DETECTION_DATA_GROUP, QUANTIFICATION_DETECTION_DATA_SET, row, QUANTIFICATION_DETECTION_QUANTIFICATION_COLUMN, quantification); } /* * Update the value for the given row. */ void CalvinCHPQuantificationDetectionFileUpdater::UpdateDetection(int row, float detection) { Update(QUANTIFICATION_DETECTION_DATA_GROUP, QUANTIFICATION_DETECTION_DATA_SET, row, QUANTIFICATION_DETECTION_DETECTION_COLUMN, detection); } affxparser/src/fusion/calvin_files/writers/src/CalvinCHPQuantificationDetectionFileUpdater.h0000644000175200017520000000411514516003651033560 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! \file CalvinCHPQuantificationDetectionFileUpdater.h Provides interfaces to update data in a "Calvin" binary "quantification/detection" data file. */ #ifndef _CalvinCHPQuantificationDetectionFileUpdater_HEADER_ #define _CalvinCHPQuantificationDetectionFileUpdater_HEADER_ #include "calvin_files/writers/src/DataSetUpdater.h" // namespace affymetrix_calvin_io { /*! Provides interfaces to update data in a "Calvin" binary "quantification/detection" data file. * This class assumes that the file has been completely written and that * only existing data groups/sets/rows/cols are being modified. */ class CalvinCHPQuantificationDetectionFileUpdater : public DataSetUpdater { public: /*! Constructor */ CalvinCHPQuantificationDetectionFileUpdater(); /*! Destructor */ ~CalvinCHPQuantificationDetectionFileUpdater(); /*! Update the quantification value for the given row * @param row The row index. * @param quantification The new quantification value. */ void UpdateQuantification(int row, float quantification); /*! Update the detection value for the given row * @param row The row index. * @param pvalue The new p-value. */ void UpdateDetection(int row, float pvalue); }; } #endif // _CalvinCHPQuantificationDetectionFileUpdater_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCHPQuantificationDetectionFileWriter.cpp0000644000175200017520000000516514516003651033771 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CalvinCHPQuantificationDetectionFileWriter.h" // #include "calvin_files/data/src/CHPQuantificationDetectionData.h" // using namespace affymetrix_calvin_io; CHPQuantificationDetectionFileWriter::CHPQuantificationDetectionFileWriter(CHPQuantificationDetectionData &p) { maxProbeSetName = p.GetMaxProbeSetName(); writer = new GenericFileWriter(p.GetFileHeader()); WriteHeaders(); } CHPQuantificationDetectionFileWriter::~CHPQuantificationDetectionFileWriter() { delete writer; } void CHPQuantificationDetectionFileWriter::WriteHeaders() { writer->WriteHeader(); DataGroupWriter &dataGroupWriter = writer->GetDataGroupWriter(0); dataGroupWriter.WriteHeader(); //int iSet=0; DataSetWriterIt beginSet; DataSetWriterIt endSet; dataGroupWriter.GetDataSetWriters(beginSet, endSet); if (beginSet != endSet) { dataSetWriter = &(*beginSet); beginSet->WriteHeader(); entryPos = SetFilePositions(); dataGroupWriter.UpdateNextDataGroupPos(); } } void CHPQuantificationDetectionFileWriter::SeekToDataSet() { DataGroupWriter &dataGroupWriter = writer->GetDataGroupWriter(0); dataSetWriter = &dataGroupWriter.GetDataSetWriter(0); writer->SeekFromBeginPos(entryPos); } void CHPQuantificationDetectionFileWriter::WriteEntry(const affymetrix_calvin_data::ProbeSetQuantificationDetectionData& p) { if (maxProbeSetName == -1) dataSetWriter->Write(p.id); else dataSetWriter->Write(p.name, maxProbeSetName); dataSetWriter->Write(p.quantification); dataSetWriter->Write(p.pvalue); } int32_t CHPQuantificationDetectionFileWriter::SetFilePositions() { int32_t dataSetSz = dataSetWriter->GetDataSetSize(); int32_t offset = writer->GetFilePos(); writer->SeekFromCurrentPos(dataSetSz + 1); dataSetWriter->UpdateNextDataSetOffset(); return offset; } affxparser/src/fusion/calvin_files/writers/src/CalvinCHPQuantificationDetectionFileWriter.h0000644000175200017520000000452414516003651033434 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPQuantificationDetectionFileWriter_HEADER_ #define _CHPQuantificationDetectionFileWriter_HEADER_ /*! \file CalvinCHPQuantificationDetectionFileWriter.h Contains classes to write a quant/detection CHP file. */ #include "calvin_files/data/src/CHPQuantificationDetectionData.h" #include "calvin_files/data/src/ProbeSetQuantificationDetectionData.h" #include "calvin_files/writers/src/GenericFileWriter.h" // #include // namespace affymetrix_calvin_io { /*! A class to write a quant/detection CHP file. */ class CHPQuantificationDetectionFileWriter { private: /*! The file writer. */ GenericFileWriter* writer; /*! The data set writer. */ DataSetWriter* dataSetWriter; /*! The file position of the entry for each data set. */ int32_t entryPos; /*! The maximum probe set name length. */ int32_t maxProbeSetName; public: /*! Constructor * @param p Pointer to the quant/detection CHP data object. */ CHPQuantificationDetectionFileWriter(CHPQuantificationDetectionData& p); /*! Destructor */ ~CHPQuantificationDetectionFileWriter(); /*! Write an entry. * @param p A pointer to a quant/detection entry. */ void WriteEntry(const affymetrix_calvin_data::ProbeSetQuantificationDetectionData & p); /*! Seeks to the data set. */ void SeekToDataSet(); private: /*! Writes the headers to the file. */ void WriteHeaders(); /*! Sets the file positions. * @return The current file position. */ int32_t SetFilePositions(); }; } #endif // _CHPQuantificationDetectionFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCHPQuantificationFileBufferWriter.cpp0000644000175200017520000000577614516003651033274 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file CalvinCHPQuantificationFileBufferWriter.cpp * @author David Le * @date Mon May 15 12:09:42 2006 * * @brief Class for writing quantifications to a buffer before writing to CHP files. */ #include "calvin_files/writers/src/CalvinCHPQuantificationFileBufferWriter.h" // #include "calvin_files/writers/src/CalvinCHPFileUpdater.h" // using namespace affymetrix_calvin_io; CHPQuantificationFileBufferWriter::CHPQuantificationFileBufferWriter() { m_BufferSize = 0; m_MaxBufferSize = MAX_BUFFER_SIZE; } CHPQuantificationFileBufferWriter::~CHPQuantificationFileBufferWriter() { FlushBuffer(); Cleanup(); } void CHPQuantificationFileBufferWriter::Cleanup() { for (int target=0; target *CHPFileNames) { m_CHPFileNames = CHPFileNames; Cleanup(); for (int i=0; isize(); i++) { std::vector quantificationBuffer; m_TargetQuantificationBuffers.push_back(quantificationBuffer); m_TargetQuantificationRowIndexes.push_back(0); } m_BufferSize = 0; } void CHPQuantificationFileBufferWriter::WriteQuantificationEntry(int target, float quantification) { m_TargetQuantificationBuffers[target].push_back(quantification); m_BufferSize += sizeof(float); if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CHPQuantificationFileBufferWriter::FlushBuffer() { if(m_BufferSize > 0) { for (int target=0; targetsize(); target++) { CalvinCHPFileUpdater updater; updater.OpenCHPFile((*m_CHPFileNames)[target].c_str()); updater.UpdateExpressionQuantificationBuffer(m_TargetQuantificationRowIndexes[target], m_TargetQuantificationBuffers[target]); updater.CloseCHPFile(); m_TargetQuantificationRowIndexes[target] += m_TargetQuantificationBuffers[target].size(); m_TargetQuantificationBuffers[target].clear(); } } m_BufferSize = 0; } affxparser/src/fusion/calvin_files/writers/src/CalvinCHPQuantificationFileBufferWriter.h0000644000175200017520000000514514516003651032727 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file CalvinCHPQuantificationFileBufferWriter.h * @author David Le * @date Mon May 15 12:09:42 2006 * * @brief Class for writing Quantifications to a buffer before writing to CHP files. */ #ifndef _CHPQUANTIFICATIONFILEBUFFERWRITER_HEADER_ #define _CHPQUANTIFICATIONFILEBUFFERWRITER_HEADER_ #include #include #include // #define MAX_BUFFER_SIZE 5242880 // 5 MB namespace affymetrix_calvin_io { class CHPQuantificationFileBufferWriter { public: /*! Constructor */ CHPQuantificationFileBufferWriter(); /*! Destructor */ ~CHPQuantificationFileBufferWriter(); /*! Set maximum buffer size */ void SetMaxBufferSize(int MaxBufferSize) { m_MaxBufferSize = MaxBufferSize; } /*! Cleans up memory */ void Cleanup(); /*! Initialize quantification buffer writer * @param CHPFileNames Reference to a list of CHP file names. */ void Initialize(std::vector *CHPFileNames); /*! Write a quantification entry to buffer. If the buffer is full, flush it. * @param target Target for the quantification entry. * @param quantification Value for the quantification entry. */ void WriteQuantificationEntry(int target, float quantification); /*! Write the content of the buffer to CHP files. */ void FlushBuffer(); private: // Pointer to list of CHP file names. std::vector *m_CHPFileNames; // List of targets used for storing quantification entries. std::vector< std::vector > m_TargetQuantificationBuffers; // Buffer for storing quantification row indexes. std::vector m_TargetQuantificationRowIndexes; // Size of the current buffer in bytes. int m_BufferSize; // Maximum size of the buffer before it gets flushed int m_MaxBufferSize; }; } #endif // _CHPQUANTIFICATIONFILEBUFFERWRITER_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCHPQuantificationFileUpdater.cpp0000644000175200017520000000320714516003651032255 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CalvinCHPQuantificationFileUpdater.h" // using namespace affymetrix_calvin_io; using namespace std; /*! The data set index. */ #define QUANTIFICATION_DATA_SET 0 /*! The data group index. */ #define QUANTIFICATION_DATA_GROUP 0 /*! The quantification column. */ #define QUANTIFICATION_COLUMN 1 /* * Initialize any needed members. */ CalvinCHPQuantificationFileUpdater::CalvinCHPQuantificationFileUpdater() : DataSetUpdater() { } /* * Clean up. */ CalvinCHPQuantificationFileUpdater::~CalvinCHPQuantificationFileUpdater() { } /* * Update the value for the given row. */ void CalvinCHPQuantificationFileUpdater::UpdateQuantification(int row, float quantification) { Update(QUANTIFICATION_DATA_GROUP, QUANTIFICATION_DATA_SET, row, QUANTIFICATION_COLUMN, quantification); } affxparser/src/fusion/calvin_files/writers/src/CalvinCHPQuantificationFileUpdater.h0000644000175200017520000000352314516003651031723 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! \file CalvinCHPQuantificationFileUpdater.h Provides interfaces to update data in a "Calvin" binary "quantification" data file. */ #ifndef _CalvinCHPQuantificationFileUpdater_HEADER_ #define _CalvinCHPQuantificationFileUpdater_HEADER_ #include "calvin_files/writers/src/DataSetUpdater.h" // namespace affymetrix_calvin_io { /*! Provides interfaces to update data in a "Calvin" binary "quantification" data file. * This class assumes that the file has been completely written and that * only existing data groups/sets/rows/cols are being modified. */ class CalvinCHPQuantificationFileUpdater : public DataSetUpdater { public: /*! Constructor */ CalvinCHPQuantificationFileUpdater(); /*! Destructor */ ~CalvinCHPQuantificationFileUpdater(); /*! Update the quantification value for the given row * @param row The row index. * @param quantification The new quantification value. */ void UpdateQuantification(int row, float quantification); }; } #endif // _CalvinCHPQuantificationFileUpdater_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCHPQuantificationFileWriter.cpp0000644000175200017520000000474514516003651032135 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CalvinCHPQuantificationFileWriter.h" // #include "calvin_files/data/src/CHPQuantificationData.h" // using namespace affymetrix_calvin_io; CHPQuantificationFileWriter::CHPQuantificationFileWriter(CHPQuantificationData &p) { maxProbeSetName = p.GetMaxProbeSetName(); writer = new GenericFileWriter(p.GetFileHeader()); WriteHeaders(); } CHPQuantificationFileWriter::~CHPQuantificationFileWriter() { delete writer; } void CHPQuantificationFileWriter::WriteHeaders() { writer->WriteHeader(); DataGroupWriter &dataGroupWriter = writer->GetDataGroupWriter(0); dataGroupWriter.WriteHeader(); //int iSet=0; DataSetWriterIt beginSet; DataSetWriterIt endSet; dataGroupWriter.GetDataSetWriters(beginSet, endSet); if (beginSet != endSet) { dataSetWriter = &(*beginSet); beginSet->WriteHeader(); entryPos = SetFilePositions(); dataGroupWriter.UpdateNextDataGroupPos(); } } void CHPQuantificationFileWriter::SeekToDataSet() { DataGroupWriter &dataGroupWriter = writer->GetDataGroupWriter(0); dataSetWriter = &dataGroupWriter.GetDataSetWriter(0); writer->SeekFromBeginPos(entryPos); } void CHPQuantificationFileWriter::WriteEntry(const affymetrix_calvin_data::ProbeSetQuantificationData& p) { if (maxProbeSetName == -1) dataSetWriter->Write(p.id); else dataSetWriter->Write(p.name, maxProbeSetName); dataSetWriter->Write(p.quantification); } int32_t CHPQuantificationFileWriter::SetFilePositions() { int32_t dataSetSz = dataSetWriter->GetDataSetSize(); int32_t offset = writer->GetFilePos(); writer->SeekFromCurrentPos(dataSetSz + 1); dataSetWriter->UpdateNextDataSetOffset(); return offset; } affxparser/src/fusion/calvin_files/writers/src/CalvinCHPQuantificationFileWriter.h0000644000175200017520000000431114516003651031567 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPQuantificationFileWriter_HEADER_ #define _CHPQuantificationFileWriter_HEADER_ /*! \file CalvinCHPQuantificationFileWriter.h Contains classes to write a quant CHP file. */ #include "calvin_files/data/src/CHPQuantificationData.h" #include "calvin_files/data/src/ProbeSetQuantificationData.h" #include "calvin_files/writers/src/GenericFileWriter.h" // #include // namespace affymetrix_calvin_io { /*! A class to write a quant CHP file. */ class CHPQuantificationFileWriter { private: /*! The file writer. */ GenericFileWriter* writer; /*! The data set writer. */ DataSetWriter* dataSetWriter; /*! The file position of the entry for each data set. */ int32_t entryPos; /*! The maximum probe set name length. */ int32_t maxProbeSetName; public: /*! Constructor * @param p Pointer to the quant CHP data object. */ CHPQuantificationFileWriter(CHPQuantificationData& p); /*! Destructor */ ~CHPQuantificationFileWriter(); /*! Write an entry. * @param p A pointer to a quant entry. */ void WriteEntry(const affymetrix_calvin_data::ProbeSetQuantificationData & p); /*! Seeks to the data set. */ void SeekToDataSet(); private: /*! Writes the headers to the file. */ void WriteHeaders(); /*! Sets the file positions. * @return The current file position. */ int32_t SetFilePositions(); }; } #endif // _CHPQuantificationFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCHPTilingFileWriter.cpp0000644000175200017520000000455714516003651030406 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CalvinCHPTilingFileWriter.h" // #include "calvin_files/data/src/CHPTilingData.h" // using namespace affymetrix_calvin_io; CHPTilingFileWriter::CHPTilingFileWriter(CHPTilingData &p) { dataSetIndex= -1; entryPos = new int32_t[p.GetNumberSequences()]; writer = new GenericFileWriter(p.GetFileHeader()); WriteHeaders(); } CHPTilingFileWriter::~CHPTilingFileWriter() { delete [] entryPos; delete writer; } void CHPTilingFileWriter::WriteHeaders() { writer->WriteHeader(); DataGroupWriter &dataGroupWriter = writer->GetDataGroupWriter(0); dataGroupWriter.WriteHeader(); int iSet=0; DataSetWriterIt beginSet; DataSetWriterIt endSet; dataGroupWriter.GetDataSetWriters(beginSet, endSet); while(beginSet != endSet) { dataSetWriter = &(*beginSet); beginSet->WriteHeader(); entryPos[iSet++] = SetFilePositions(); beginSet++; } dataGroupWriter.UpdateNextDataGroupPos(); } void CHPTilingFileWriter::SeekToDataSet(int index) { DataGroupWriter &dataGroupWriter = writer->GetDataGroupWriter(0); dataSetWriter = &dataGroupWriter.GetDataSetWriter(index); writer->SeekFromBeginPos(entryPos[index]); dataSetIndex = index; } void CHPTilingFileWriter::WriteTilingEntry(const CHPTilingEntry& p) { dataSetWriter->Write(p.position); dataSetWriter->Write(p.value); } int32_t CHPTilingFileWriter::SetFilePositions() { int32_t dataSetSz = dataSetWriter->GetDataSetSize(); int32_t offset = writer->GetFilePos(); writer->SeekFromCurrentPos(dataSetSz + 1); dataSetWriter->UpdateNextDataSetOffset(); return offset; } affxparser/src/fusion/calvin_files/writers/src/CalvinCHPTilingFileWriter.h0000644000175200017520000000420114516003651030035 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CHPTilingFileWriter_HEADER_ #define _CHPTilingFileWriter_HEADER_ /*! \file CalvinCHPTilingFileWriter.h Contains classes to write a tiling CHP file. */ #include "calvin_files/data/src/CHPTilingData.h" #include "calvin_files/data/src/CHPTilingEntry.h" #include "calvin_files/writers/src/GenericFileWriter.h" // #include // namespace affymetrix_calvin_io { /*! A class to write a tiling CHP file. */ class CHPTilingFileWriter { private: /*! The file writer. */ GenericFileWriter* writer; /*! The data set writer. */ DataSetWriter* dataSetWriter; /*! The file position of the entry for each data set. */ int32_t *entryPos; public: /*! Constructor * @param p Pointer to the tiling CHP data object. */ CHPTilingFileWriter(CHPTilingData& p); /*! Destructor */ ~CHPTilingFileWriter(); /*! Write a tiling entry. * @param p A pointer to a tiling entry. */ void WriteTilingEntry(const CHPTilingEntry& p); /*! Seeks to the data set. * @param index The data set index. */ void SeekToDataSet(int index); private: /*! The current data set index. */ int dataSetIndex; /*! Writes the headers to the file. */ void WriteHeaders(); /*! Sets the file positions. * @return The current file position. */ int32_t SetFilePositions(); }; } #endif // _CHPTilingFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CalvinCelFileWriter.cpp0000644000175200017520000000756014516003651027325 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CalvinCelFileWriter.h" // #include "calvin_files/data/src/CELData.h" // using namespace affymetrix_calvin_io; CelFileWriter::CelFileWriter(CelFileData &p) { p.SetVersion(CurrentCelFileVersion); writer = new GenericFileWriter(p.GetFileHeader()); intensityPos = 0; stdDevPos = 0; pixelPos = 0; outlierPos = 0; maskPos = 0; WriteHeaders(); } CelFileWriter::~CelFileWriter() { delete writer; } void CelFileWriter::WriteHeaders() { writer->WriteHeader(); DataGroupWriter* dataGroupWriter = &writer->GetDataGroupWriter(0); dataGroupWriter->WriteHeader(); DataSetWriterIt begin; DataSetWriterIt end; dataGroupWriter->GetDataSetWriters(begin, end); while(begin != end) { dataSetWriter = &(*begin); begin->WriteHeader(); SetFilePositions(); begin++; } } void CelFileWriter::WriteIntensities(const FloatVector& v) { writer->SeekFromBeginPos(intensityPos); FloatVectorConstIt begin = v.begin(); FloatVectorConstIt end = v.end(); while(begin != end) { dataSetWriter->Write(*begin); begin++; } intensityPos = writer->GetFilePos(); } void CelFileWriter::WriteStdDevs(const FloatVector& v) { writer->SeekFromBeginPos(stdDevPos); FloatVectorConstIt begin = v.begin(); FloatVectorConstIt end = v.end(); while(begin != end) { dataSetWriter->Write(*begin); begin++; } stdDevPos = writer->GetFilePos(); } void CelFileWriter::WritePixels(const Int16Vector &v) { writer->SeekFromBeginPos(pixelPos); Int16VectorConstIt begin = v.begin(); Int16VectorConstIt end = v.end(); while(begin != end) { dataSetWriter->Write(*begin); begin++; } pixelPos = writer->GetFilePos(); } void CelFileWriter::WriteOutlierCoords(const XYCoordVector &v) { writer->SeekFromBeginPos(outlierPos); XYCoordConstIt begin = v.begin(); XYCoordConstIt end = v.end(); while(begin != end) { dataSetWriter->Write(begin->xCoord); dataSetWriter->Write(begin->yCoord); begin++; } outlierPos = writer->GetFilePos(); } void CelFileWriter::WriteMaskCoords(const XYCoordVector &v) { writer->SeekFromBeginPos(maskPos); XYCoordConstIt begin = v.begin(); XYCoordConstIt end = v.end(); while(begin != end) { dataSetWriter->Write(begin->xCoord); dataSetWriter->Write(begin->yCoord); begin++; } maskPos = writer->GetFilePos(); } void CelFileWriter::SetFilePositions() { const std::wstring &name = dataSetWriter->GetDataSetName(); int32_t dataSetSz = dataSetWriter->GetDataSetSize(); if(name == CelIntensityLabel) { intensityPos = writer->GetFilePos(); intensityLimitPos = intensityPos + dataSetSz; } else if(name == CelStdDevLabel) { stdDevPos = writer->GetFilePos(); stdDevLimitPos = stdDevPos + dataSetSz; } else if(name == CelPixelLabel) { pixelPos = writer->GetFilePos(); pixelLimitPos = pixelPos + dataSetSz; } else if(name == CelOutlierLabel) { outlierPos = writer->GetFilePos(); outlierLimitPos = outlierPos + dataSetSz; } else { maskPos = writer->GetFilePos(); maskLimitPos = maskPos + dataSetSz; } writer->SeekFromCurrentPos(dataSetSz + 1); dataSetWriter->UpdateNextDataSetOffset(); } affxparser/src/fusion/calvin_files/writers/src/CalvinCelFileWriter.h0000644000175200017520000000360014516003651026761 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CelFileWriter_HEADER_ #define _CelFileWriter_HEADER_ #include "calvin_files/data/src/CELData.h" #include "calvin_files/writers/src/GenericFileWriter.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class CelFileWriter { private: GenericFileWriter* writer; DataSetWriter* dataSetWriter; int32_t intensityPos; int32_t stdDevPos; int32_t pixelPos; int32_t outlierPos; int32_t maskPos; int32_t intensityLimitPos; int32_t stdDevLimitPos; int32_t pixelLimitPos; int32_t outlierLimitPos; int32_t maskLimitPos; public: CelFileWriter(CelFileData &p); ~CelFileWriter(); void WriteIntensities(const FloatVector &v); void WriteStdDevs(const FloatVector &v); void WritePixels(const Int16Vector &v); void WriteOutlierCoords(const XYCoordVector &coords); void WriteMaskCoords(const XYCoordVector &coords); private: void WriteHeaders(); void SetFilePositions(); }; } #endif // _CelFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/CombinedDatConstants.h0000644000175200017520000000616514516003651027203 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CombinedDatConstants_HDR_ #define _CombinedDatConstants_HDR_ #include "calvin_files/parameter/src/AffymetrixParameterConsts.h" namespace affymetrix_calvin_io { /*! Defines the static attribute name for the probe array type of the physical array. */ #define GRID_STATUS_PARAM_NAME L"affymetrix-grid-status" #define GRID_UPPER_LEFT_X L"affymetrix-grid-upper-left-x" #define GRID_UPPER_LEFT_Y L"affymetrix-grid-upper-left-y" #define GRID_UPPER_RIGHT_X L"affymetrix-grid-upper-right-x" #define GRID_UPPER_RIGHT_Y L"affymetrix-grid-upper-right-y" #define GRID_LOWER_LEFT_X L"affymetrix-grid-lower-left-x" #define GRID_LOWER_LEFT_Y L"affymetrix-grid-lower-left-y" #define GRID_LOWER_RIGHT_X L"affymetrix-grid-lower-right-x" #define GRID_LOWER_RIGHT_Y L"affymetrix-grid-lower-right-y" #define AFFY_WAVELENGTH L"affymetrix-channel-wavelength" #define AFFY_WAVELENGTH_NAME L"affymetrix-channel-wavelength-name" #define AFFY_FILTER_WAVELENGTH L"affymetrix-filter-wavelength" #define MULTI_CHANNEL "affymetrix-calvin-multi-scan-acquisition" /*! Defines the static attribute name for HTGlobal grid header */ #define HTGLOBAL_GRID_STATUS_PARAM_NAME L"GridStatus" #define HTGLOBAL_GRID_UPPER_LEFT_X L"Upper left x" #define HTGLOBAL_GRID_UPPER_LEFT_Y L"Upper left y" #define HTGLOBAL_GRID_UPPER_RIGHT_X L"Upper right x" #define HTGLOBAL_GRID_UPPER_RIGHT_Y L"Upper right y" #define HTGLOBAL_GRID_LOWER_LEFT_X L"Lower left x" #define HTGLOBAL_GRID_LOWER_LEFT_Y L"Lower left y" #define HTGLOBAL_GRID_LOWER_RIGHT_X L"Lower right x" #define HTGLOBAL_GRID_LOWER_RIGHT_Y L"Lower right y" const int NumGridValues = 8; // 4 corners * 2 co-ordinate = 8 static const std::wstring GridValueNames[NumGridValues] = { GRID_UPPER_LEFT_X, GRID_UPPER_LEFT_Y, GRID_UPPER_RIGHT_X, GRID_UPPER_RIGHT_Y, GRID_LOWER_RIGHT_X, GRID_LOWER_RIGHT_Y, GRID_LOWER_LEFT_X, GRID_LOWER_LEFT_Y }; static const std::wstring GridStatus = GRID_STATUS_PARAM_NAME; static const std::wstring HTGridValueNames[NumGridValues] = { HTGLOBAL_GRID_UPPER_LEFT_X, HTGLOBAL_GRID_UPPER_LEFT_Y, HTGLOBAL_GRID_UPPER_RIGHT_X, HTGLOBAL_GRID_UPPER_RIGHT_Y, HTGLOBAL_GRID_LOWER_RIGHT_X, HTGLOBAL_GRID_LOWER_RIGHT_Y, HTGLOBAL_GRID_LOWER_LEFT_X, HTGLOBAL_GRID_LOWER_LEFT_Y }; static const std::wstring HTGridStatus = HTGLOBAL_GRID_STATUS_PARAM_NAME; } #endif affxparser/src/fusion/calvin_files/writers/src/CopyNumberResultWriter.cpp0000644000175200017520000001553514516003651030150 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/CopyNumberResultWriter.h" // #include "calvin_files/data/src/CHPMultiDataData.h" #include "calvin_files/exception/src/ExceptionBase.h" #include "calvin_files/parameter/src/AffymetrixParameterConsts.h" #include "calvin_files/utils/src/StringUtils.h" #include "calvin_files/writers/src/CalvinCHPMultiDataFileWriter.h" // #include #include // using namespace std; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_data; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_exceptions; /* * Initialize the class members. */ void CopyNumberResultWriter::Init() { maxProbeSetNameLength.clear(); numberProbeSets = 0; numberCytoRegions = 0; numberGenotypeProbeSets = 0; bufferWriter = NULL; } /* * Clear the buffer. */ void CopyNumberResultWriter::Clear() { maxProbeSetNameLength.clear(); numberProbeSets = 0; numberCytoRegions = 0; numberGenotypeProbeSets = 0; algName.clear(); algVersion.clear(); programName.clear(); programVersion.clear(); programCompany.clear(); columns.clear(); genotypeColumns.clear(); cytoRegionColumns.clear(); algParams.clear(); summaryParams.clear(); chrStartStop.clear(); outputFiles.clear(); if (bufferWriter != NULL) delete bufferWriter; bufferWriter = NULL; } /* * Create a results file with the CEL file header and other parameters. */ void CopyNumberResultWriter::CreateResultFile(affymetrix_fusion_io::FusionCELData& cel, const std::string& fileName) { try { // Create the results file with the header. CHPMultiDataData *data = new CHPMultiDataData(fileName); data->SetEntryCount(CopyNumberMultiDataType, numberProbeSets, maxProbeSetNameLength[CopyNumberMultiDataType], columns); if (numberCytoRegions > 0) data->SetEntryCount(CytoMultiDataType, numberCytoRegions, maxProbeSetNameLength[CytoMultiDataType],cytoRegionColumns); if (numberGenotypeProbeSets > 0) data->SetEntryCount(GenotypeMultiDataType, numberGenotypeProbeSets, maxProbeSetNameLength[GenotypeMultiDataType], genotypeColumns); data->SetAlgName(StringUtils::ConvertMBSToWCS(algName)); data->SetAlgVersion(StringUtils::ConvertMBSToWCS(algVersion)); data->SetArrayType(cel.GetChipType()); GenericDataHeader *gdh = data->GetFileHeader()->GetGenericDataHdr(); ParameterNameValueType param; param.SetName(PROGRAM_NAME); param.SetValueText(StringUtils::ConvertMBSToWCS(programName)); gdh->AddNameValParam(param); param.SetName(L"program-version"); param.SetValueText(StringUtils::ConvertMBSToWCS(programVersion)); gdh->AddNameValParam(param); param.SetName(PROGRAM_COMPANY); param.SetValueText(StringUtils::ConvertMBSToWCS(programCompany)); gdh->AddNameValParam(param); ParameterNameValueTypeList params = algParams; param.SetName(L"ArraySet"); param.SetValueText(cel.GetChipType()); params.push_back(param); data->AddAlgParams(params); data->AddSummaryParams(summaryParams); DataSetHeader *dsh = data->GetDataSetHeader(CopyNumberMultiDataType); for (ParameterNameValueTypeList::iterator it=chrStartStop.begin(); it!=chrStartStop.end(); it++) dsh->AddNameValParam(*it); GenericData *gdata = cel.GetGenericData(); if (gdata != NULL) gdh->AddParent(*gdata->Header().GetGenericDataHdr()); CHPMultiDataFileWriter *writer = new CHPMultiDataFileWriter(*data); delete writer; delete data; // Create a buffer writer object outputFiles.clear(); outputFiles.push_back(fileName); vector dataTypes; dataTypes.push_back(CopyNumberMultiDataType); if (numberCytoRegions > 0) dataTypes.push_back(CytoMultiDataType); if (numberGenotypeProbeSets > 0) dataTypes.push_back(GenotypeMultiDataType); bufferWriter = new CHPMultiDataFileBufferWriter(); bufferWriter->Initialize(&outputFiles, dataTypes, maxProbeSetNameLength); } catch (CalvinException &ex) { string err = "Error creating the output file: " + fileName; wstring msg = ex.ToString(); if (msg.empty() == false) err += " " + StringUtils::ConvertWCSToMBS(msg); throw err; } catch (...) { string err = "Error creating the output file: " + fileName; throw err; } } /* * Flush the buffer and close the file. */ void CopyNumberResultWriter::CloseResultsFile() { if (bufferWriter != NULL) { bufferWriter->FlushBuffer(); delete bufferWriter; } bufferWriter = NULL; } /* * Write the entry to the buffer. */ void CopyNumberResultWriter::WriteProbeSetResult(const ProbeSetMultiDataCopyNumberData& entry) { bufferWriter->WriteMultiDataCopyNumberEntry(CopyNumberMultiDataType, 0, entry); } /* * Write the entry to the buffer. */ void CopyNumberResultWriter::WriteGenotypeProbeSetResult(const ProbeSetMultiDataGenotypeData& entry) { bufferWriter->WriteMultiDataGenotypeEntry(GenotypeMultiDataType, 0, entry); } /* * Write the entry to the buffer. */ void CopyNumberResultWriter::WriteCytoRegionResult(const ProbeSetMultiDataCytoRegionData& entry) { bufferWriter->WriteMultiDataCytoRegionEntry(CytoMultiDataType, 0, entry); } /* * Store the start index and count of probe sets for the given chromosome. */ void CopyNumberResultWriter::SetChromosomeProbeSetIndexInformation(u_int8_t chr, int startIndex, int count) { ostringstream str; str << (int) chr; wstring schr = StringUtils::ConvertMBSToWCS(str.str()); ParameterNameValueType param; param.SetName(schr + L":start"); param.SetValueInt32(startIndex); chrStartStop.push_back(param); param.SetName(schr + L":count"); param.SetValueInt32(count); chrStartStop.push_back(param); param.SetName(schr + L":display"); param.SetValueAscii(ChromosomeToString(chr)); chrStartStop.push_back(param); } affxparser/src/fusion/calvin_files/writers/src/CopyNumberResultWriter.h0000644000175200017520000001613514516003651027612 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CopyNumberResultWriter_HEADER_ #define _CopyNumberResultWriter_HEADER_ /*! \file CopyNumberResultWriter.h Defines classes which provides a wrapper to writing a single copy number file of the Command Console format. */ #include "calvin_files/data/src/ColumnInfo.h" #include "calvin_files/data/src/ProbeSetMultiDataData.h" #include "calvin_files/fusion/src/FusionCELData.h" #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/writers/src/CalvinCHPMultiDataFileBufferWriter.h" #include "calvin_files/writers/src/CalvinCHPMultiDataFileWriter.h" // #include #include #include #include // /*! This class provides a wrapper to writing a single copy number file of the Command Console format. */ class CopyNumberResultWriter { private: /*! The maximum probe set name. */ std::map maxProbeSetNameLength; /*! The number of probe sets */ int numberProbeSets; /*! The number of cyto regions */ int numberCytoRegions; /*! The number of genotype probe sets */ int numberGenotypeProbeSets; /*! The name of the algorithm. */ std::string algName; /*! The algorithm version. */ std::string algVersion; /*! The program name. */ std::string programName; /*! The program version. */ std::string programVersion; /*! The company name. */ std::string programCompany; /*! The columns of data to store. */ std::vector columns; /*! The columns of genotype data to store. */ std::vector genotypeColumns; /*! The columns of cytoregion data to store. */ std::vector cytoRegionColumns; /*! Algorithm parameters. */ affymetrix_calvin_parameter::ParameterNameValueTypeList algParams; /*! Summary parameters. */ affymetrix_calvin_parameter::ParameterNameValueTypeList summaryParams; /*! A list of start/stop indicies for the probesets in a chromosome. */ affymetrix_calvin_parameter::ParameterNameValueTypeList chrStartStop; /*! The writer for the results file. */ affymetrix_calvin_io::CHPMultiDataFileBufferWriter *bufferWriter; /*! Used by the buffer writing. The list of output files. */ std::vector outputFiles; /*! Initialize the class */ void Init(); public: /*! Initialize the class */ CopyNumberResultWriter() { Init(); } /*! Clean up */ ~CopyNumberResultWriter() { Clear(); } /*! Clean up */ void Clear(); /*! Set the max probe set name length. * @param len The maximum length. */ void MaximumProbeSetNameLength(int len) { maxProbeSetNameLength[affymetrix_calvin_io::CopyNumberMultiDataType] = len; } /*! Set the max cyto region name length. * @param len The maximum length. */ void MaximumCytoRegionNameLength(int len) { maxProbeSetNameLength[affymetrix_calvin_io::CytoMultiDataType] = len; } /*! Set the max genotype probe set name length. * @param len The maximum length. */ void MaximumGenotypeProbeSetNameLength(int len) { maxProbeSetNameLength[affymetrix_calvin_io::GenotypeMultiDataType] = len; } /*! The name of the algorithm. * @return The algorithm name. */ std::string& AlgName() { return algName; } /*! Sets the algorithm version. * @return The version. */ std::string& AlgVersion() { return algVersion; } /*! The program name. */ std::string& ProgramName() { return programName; } /*! The program version. */ std::string& ProgramVersion() { return programVersion; } /*! The company name. */ std::string& ProgramCompany() { return programCompany; } /*! The number of probe sets * @return The number of probe sets */ int& NumberProbeSets() { return numberProbeSets; } /*! The number of cyto regions * @return The number of cyto regions */ int& NumberCytoRegions() { return numberCytoRegions; } /*! The number of genotype probe sets * return The number of genotype probe sets. */ int& NumberGenotypeProbeSets() { return numberGenotypeProbeSets; } /*! The columns of data to store. */ std::vector& Columns() { return columns; } /*! The columns of genotype data to store. */ std::vector& GenotypeColumns() { return genotypeColumns; } /*! The columns of cyto data to store. */ std::vector& CytoRegionColumns() { return cytoRegionColumns; } /*! Algorithm parameters. This should include the genome name, version, etc. */ affymetrix_calvin_parameter::ParameterNameValueTypeList& AlgParams() { return algParams; } /*! Summary parameters. This includes items such as IQR, etc. */ affymetrix_calvin_parameter::ParameterNameValueTypeList& SummaryParams() { return summaryParams; } /*! Store the starting probe set index and count of probe sets for the given chromosome. * @param chr The chromosome value * @param startIndex The index of the probe sets that the given chromosome starts at. * @param count The number of probe sets for the given chromosome. */ void SetChromosomeProbeSetIndexInformation(u_int8_t chr, int startIndex, int count); /*! Create the results file with the header information (no data is written). This function * will throw a std::string exception on errors. * @param cel The CEL file data object. * @param fileName The name of the results file to create. * @return True if successful. */ void CreateResultFile(affymetrix_fusion_io::FusionCELData& cel, const std::string& fileName); /*! Write the probe set result to the file. * @param entry The results. */ void WriteProbeSetResult(const affymetrix_calvin_data::ProbeSetMultiDataCopyNumberData& entry); /*! Write the cyto region result to the file. * @param entry The results. */ void WriteCytoRegionResult(const affymetrix_calvin_data::ProbeSetMultiDataCytoRegionData& entry); /*! Write the genotype probe set result to the file. * @param entry The results. */ void WriteGenotypeProbeSetResult(const affymetrix_calvin_data::ProbeSetMultiDataGenotypeData& entry); /*! Close the output file. */ void CloseResultsFile(); }; #endif affxparser/src/fusion/calvin_files/writers/src/DATFileUpdater.cpp0000644000175200017520000001414114516003651026216 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/DATFileUpdater.h" // using namespace affymetrix_calvin_io; // Constant column names. static const std::wstring GridStatusColName = L"GridStatus"; static const int32_t NumGridCols = 8; static const std::wstring GridColNames[NumGridCols] = {L"Upper left x", L"Upper left y", L"Upper right x", L"Upper right y", L"Lower right x", L"Lower right y", L"Lower left x", L"Lower left y" }; DATFileUpdater::DATFileUpdater(DATData &p) { updateData = &p; currentData = new DATData(); ReadCurrentData(); writer = new GenericFileWriter(currentData->GetFileHeader(), false); } DATFileUpdater::~DATFileUpdater() { delete currentData; delete writer; } void DATFileUpdater::Update() { // determine whether to add grid or subgrid info (or both) UpdateFileHeader(); if(updateData->HasGlobalGridData()) { GenericData* gData = ¤tData->GetGenericData(); u_int32_t gridDataSetOffset = GetGridDataSetOffset(gData->Header()); DataSetHeader gridHdr = CreateGridDataSetHeader(); WriteGridData(gridHdr, gridDataSetOffset); if(updateData->GetSubgridCnt() > 0) { u_int32_t subGridDataSetOffset = gridHdr.GetNextSetFilePos(); DataSetHeader subGridHdr = CreateSubGridDataSetHeader(); WriteSubGridData(subGridHdr, subGridDataSetOffset); UpdateDataSetCnt(gData->Header(), 4); } else { UpdateDataSetCnt(gData->Header(), 3); } } else if(updateData->GetSubgridCnt() > 0) { GenericData* gData = ¤tData->GetGenericData(); u_int32_t subGridDataSetOffset = GetSubGridDataSetOffset(gData->Header()); if(subGridDataSetOffset > 0) { DataSetHeader subGridHdr = CreateSubGridDataSetHeader(); WriteSubGridData(subGridHdr, subGridDataSetOffset); UpdateDataSetCnt(gData->Header(), 4); } else { // no global grid information was found in the file // global grid information has to be in the DATData object or in the file } } } void DATFileUpdater::UpdateDataSetCnt(FileHeader fh, u_int32_t cnt) const { u_int32_t dataGrpOffset = fh.GetFirstDataGroupFilePos(); writer->SeekFromBeginPos(dataGrpOffset + 8); writer->Write(cnt); } u_int32_t DATFileUpdater::GetGridDataSetOffset(FileHeader fh) const { u_int32_t result = 0; if(fh.GetDataGroupCnt() > 0) { DataGroupHeader grpHdr = fh.GetDataGroup(0); if(grpHdr.GetDataSetCnt() > 1) { DataSetHeader dsHdr = grpHdr.GetDataSet(1); result = dsHdr.GetNextSetFilePos(); } } return result; } u_int32_t DATFileUpdater::GetSubGridDataSetOffset(FileHeader fh) const { u_int32_t result = 0; if(fh.GetDataGroupCnt() > 0) { DataGroupHeader grpHdr = fh.GetDataGroup(0); if(grpHdr.GetDataSetCnt() > 2) { DataSetHeader dsHdr = grpHdr.GetDataSet(2); result = dsHdr.GetNextSetFilePos(); } } return result; } void DATFileUpdater::WriteGridData(DataSetHeader& gridHdr, u_int32_t offset) const { writer->SeekFromBeginPos(offset); DataSetWriter* dsWriter = writer->GetDataGroupWriter(0).CreateDataSetWriter(gridHdr); dsWriter->WriteHeader(); dsWriter->Write(updateData->GetGlobalGridStatus()); FPointVector points = updateData->GetGlobalGrid().pts; for(u_int32_t i = 0; i < points.size(); i++) { dsWriter->Write(points[i].x); dsWriter->Write(points[i].y); } dsWriter->UpdateNextDataSetOffset(); delete dsWriter; } void DATFileUpdater::WriteSubGridData(DataSetHeader& subGridHdr, u_int32_t offset) const { writer->SeekFromBeginPos(offset); DataSetWriter* dsWriter = writer->GetDataGroupWriter(0).CreateDataSetWriter(subGridHdr); dsWriter->WriteHeader(); u_int32_t cnt = updateData->GetSubgridCnt(); for(u_int32_t n = 0; n < cnt; n++) { dsWriter->Write(updateData->GetSubgridStatus(n)); FPointVector points = updateData->GetSubgrid(n).pts; for(u_int32_t i = 0; i < points.size(); i++) { dsWriter->Write(points[i].x); dsWriter->Write(points[i].y); } } dsWriter->UpdateNextDataSetOffset(); delete dsWriter; } DataSetHeader DATFileUpdater::CreateGridDataSetHeader() const { DataSetHeader result; AddGridDataSetHeaderParameters(result); result.SetRowCnt(1); result.SetName(DAT_GLOBAL_GRID); result.AddUIntColumn(GridStatusColName); for(int i = 0; i < NumGridCols; i++) { result.AddFloatColumn(GridColNames[i]); } return result; } DataSetHeader DATFileUpdater::CreateSubGridDataSetHeader() const { DataSetHeader result; AddGridDataSetHeaderParameters(result); result.SetRowCnt(updateData->GetSubgridCnt()); result.SetName(DAT_SUBGRID); result.AddUIntColumn(GridStatusColName); for(int i = 0; i < NumGridCols; i++) { result.AddFloatColumn(GridColNames[i]); } return result; } void DATFileUpdater::ReadCurrentData() const { DATFileReader reader; currentData->SetFilename(updateData->GetFilename()); reader.Read(*currentData); } void DATFileUpdater::UpdateFileHeader() { // Set the updateData fileId to a new value. updateData->GetFileHeader()->GetGenericDataHdr()->SetFileId(AffymetrixGuid::GenerateNewGuid()); // Update the FileHeader FileHeaderUpdater updater; updater.Update( writer->GetFileOStream(), *updateData->GetFileHeader(), *currentData->GetFileHeader()); } void DATFileUpdater::AddGridDataSetHeaderParameters(DataSetHeader& hdr) const { ParameterNameValueTypeVector params; updateData->GetGridAlignmentAlgorithmParameters(params); for (ParameterNameValueTypeConstIt ii = params.begin(); ii != params.end(); ++ii) { hdr.AddNameValParam(*ii); } } affxparser/src/fusion/calvin_files/writers/src/DATFileUpdater.h0000644000175200017520000000410614516003651025663 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DATFileUpdater_HEADER_ #define _DATFileUpdater_HEADER_ #include "calvin_files/data/src/DATData.h" #include "calvin_files/parsers/src/DATFileReader.h" #include "calvin_files/writers/src/FileHeaderUpdater.h" #include "calvin_files/writers/src/GenericFileWriter.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class DATFileUpdater { private: DATData* updateData; DATData* currentData; GenericFileWriter* writer; DataSetWriter* dataSetWriter; public: DATFileUpdater(DATData &p); ~DATFileUpdater(); void Update(); private: void WriteGridData(DataSetHeader& gridHdr, u_int32_t offset) const; void WriteSubGridData(DataSetHeader& SubGridHdr, u_int32_t offset) const; u_int32_t GetGridDataSetOffset(FileHeader fh) const; u_int32_t GetSubGridDataSetOffset(FileHeader fh) const; DataSetHeader CreateGridDataSetHeader() const; DataSetHeader CreateSubGridDataSetHeader() const; void UpdateDataSetCnt(FileHeader fh, u_int32_t cnt) const; void ReadCurrentData() const; void UpdateFileHeader(); void AddGridDataSetHeaderParameters(DataSetHeader& hdr) const; }; } #endif // _DATFileUpdater_HEADER_ affxparser/src/fusion/calvin_files/writers/src/DATFileWriter.cpp0000644000175200017520000000500014516003651026060 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/DATFileWriter.h" // #include "calvin_files/data/src/DATData.h" // using namespace affymetrix_calvin_io; DATFileWriter::DATFileWriter(DATData &p) { writer = new GenericFileWriter(p.GetFileHeader()); pixelPos = 0; statsPos = 0; WriteHeaders(); } DATFileWriter::~DATFileWriter() { delete writer; } void DATFileWriter::WriteHeaders() { writer->WriteHeader(); DataGroupWriter* dataGroupWriter = &writer->GetDataGroupWriter(0); dataGroupWriter->WriteHeader(); DataSetWriterIt begin; DataSetWriterIt end; dataGroupWriter->GetDataSetWriters(begin, end); while(begin != end) { dataSetWriter = &(*begin); begin->WriteHeader(); SetFilePositions(); begin++; } } void DATFileWriter::WriteStats(const Uint16Vector &v) { writer->SeekFromBeginPos(statsPos); Uint16VectorConstIt begin = v.begin(); Uint16VectorConstIt end = v.end(); while(begin != end) { dataSetWriter->Write(*begin); begin++; } statsPos = writer->GetFilePos(); } void DATFileWriter::WritePixels(const Uint16Vector &v) { writer->SeekFromBeginPos(pixelPos); Uint16VectorConstIt begin = v.begin(); Uint16VectorConstIt end = v.end(); while(begin != end) { dataSetWriter->Write(*begin); begin++; } pixelPos = writer->GetFilePos(); } void DATFileWriter::SetFilePositions() { const std::wstring &name = dataSetWriter->GetDataSetName(); int32_t dataSetSz = dataSetWriter->GetDataSetSize(); if(name == DAT_PIXEL) { pixelPos = writer->GetFilePos(); pixelLimitPos = pixelPos + dataSetSz; } else { statsPos = writer->GetFilePos(); statsLimitPos = statsPos + dataSetSz; } writer->SeekFromCurrentPos(dataSetSz + 1); dataSetWriter->UpdateNextDataSetOffset(); } affxparser/src/fusion/calvin_files/writers/src/DATFileWriter.h0000644000175200017520000000314214516003651025532 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DATFileWriter_HEADER_ #define _DATFileWriter_HEADER_ #include "calvin_files/data/src/DATData.h" #include "calvin_files/writers/src/GenericFileWriter.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class DATFileWriter { private: GenericFileWriter* writer; DataSetWriter* dataSetWriter; int32_t pixelPos; int32_t statsPos; int32_t pixelLimitPos; int32_t statsLimitPos; public: DATFileWriter(DATData &p); ~DATFileWriter(); virtual void WriteStats(const Uint16Vector &v); virtual void WritePixels(const Uint16Vector &v); private: void WriteHeaders(); void SetFilePositions(); }; } #endif // _DATFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/DataGroupHeaderUpdater.cpp0000644000175200017520000001207414516003651030010 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/DataGroupHeaderUpdater.h" // #include "calvin_files/writers/src/FileOutput.h" // using namespace std; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; #define WCHAR_T_SIZE 2 /* * Constructor */ DataGroupHeaderUpdater::DataGroupHeaderUpdater(std::ofstream& fileStream, DataGroupHeader &hdr) { os = &fileStream; grpHdr = &hdr; } /* * Destructor */ DataGroupHeaderUpdater::~DataGroupHeaderUpdater() { } void DataGroupHeaderUpdater::SeekNextDataGrpPosition() { u_int32_t pos = grpHdr->GetHeaderStartFilePos(); os->seekp(pos, std::ios::beg); } void DataGroupHeaderUpdater::SeekLeadDataSetPosition() { u_int32_t pos = grpHdr->GetHeaderStartFilePos(); pos += sizeof(u_int32_t); // next data group position os->seekp(pos, std::ios::beg); } void DataGroupHeaderUpdater::SeekDataSetCntPosition() { u_int32_t pos = grpHdr->GetHeaderStartFilePos(); pos += sizeof(u_int32_t); // next data group position pos += sizeof(u_int32_t); // lead data set position os->seekp(pos, std::ios::beg); } void DataGroupHeaderUpdater::UpdateNextDataGroupOffset(u_int32_t offset) { u_int32_t pos = os->tellp(); SeekNextDataGrpPosition(); FileOutput::WriteUInt32(*os, offset); os->seekp(pos, std::ios::beg); } void DataGroupHeaderUpdater::UpdateLeadDataSetOffset(u_int32_t offset) { u_int32_t pos = os->tellp(); SeekLeadDataSetPosition(); FileOutput::WriteUInt32(*os, offset); os->seekp(pos, std::ios::beg); } void DataGroupHeaderUpdater::UpdateDataSetCount(u_int32_t count) { u_int32_t pos = os->tellp(); SeekDataSetCntPosition(); FileOutput::WriteUInt32(*os, count); os->seekp(pos, std::ios::beg); } void DataGroupHeaderUpdater::AppendDataSetHeader(DataSetHeader& setHdr) { os->seekp(0, std::ios::end); int sz = grpHdr->GetDataSetCnt(); u_int32_t setPos = os->tellp(); setHdr.SetHeaderStartFilePos(setPos); if(sz > 0) { //update previous data set with new position DataSetHeader lastHdr = grpHdr->GetDataSet(sz - 1); DataSetHeaderUpdater lastSetUpdater(*os, lastHdr); lastSetUpdater.UpdateNextDataSetPosition(setPos); } else { UpdateLeadDataSetOffset(setPos); } //increment the number of data sets in the group UpdateDataSetCount(sz + 1); DataSetWriter setWriter(os, &setHdr); setWriter.WriteHeader(); setHdr.SetDataStartFilePos(os->tellp()); os->seekp(setWriter.GetDataSetSize(), std::ios::cur); setWriter.UpdateNextDataSetOffset(); setHdr.SetNextSetFilePos(os->tellp()); grpHdr->AddDataSetHdr(setHdr); } void DataGroupHeaderUpdater::ReplaceDataSetHeader(std::wstring targetName, DataSetHeader& newHdr) { os->seekp(0, std::ios::end); u_int32_t setPos = os->tellp(); u_int32_t nextSetPos = 0; newHdr.SetHeaderStartFilePos(setPos); DataSetHdrIt prev, current, end; grpHdr->GetDataSetIterators(current,end); prev = current; if(current == end) { //no sets. we're done return; } current++; if(prev->GetName() == targetName) { //first set is the target UpdateLeadDataSetOffset(setPos); if(current != end) { //save next set offset nextSetPos = current->GetHeaderStartFilePos(); } } else { while(current != end) { if(current->GetName() == targetName) { //OPTIMIZATION: if the new data set size is <= the size of the target set then //we should add it in place of the target and NOT orphan the target set. //NOTE: it is not easy to calculate the new data set size. ParamaterNameValueType //should be modified to return it's size. //int currentSz = (current->GetDataStartFilePos() - current->GetHeaderStartFilePos()) + current->GetDataSize(); //found the target DataSetHeaderUpdater prevSetUpdater(*os, *prev); prevSetUpdater.UpdateNextDataSetPosition(setPos); current++; if(current != end) { nextSetPos = current->GetHeaderStartFilePos(); } break; } else { prev = current; current++; } } } DataSetWriter setWriter(os, &newHdr); setWriter.WriteHeader(); newHdr.SetDataStartFilePos(os->tellp()); os->seekp(setWriter.GetDataSetSize(), std::ios::cur); if(nextSetPos > 0) { setWriter.UpdateNextDataSetOffset(nextSetPos); newHdr.SetNextSetFilePos(nextSetPos); } else { setWriter.UpdateNextDataSetOffset(); newHdr.SetNextSetFilePos(os->tellp()); } grpHdr->ReplaceDataSetHdr(newHdr); } affxparser/src/fusion/calvin_files/writers/src/DataGroupHeaderUpdater.h0000644000175200017520000000473314516003651027460 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataGroupHeaderUpdater_HEADER_ #define _DataGroupHeaderUpdater_HEADER_ /*! \file DataGroupHeaderUpdater.h This file defines a class that updates the data group header of an existing file. */ #include "calvin_files/data/src/DataGroupHeader.h" #include "calvin_files/writers/src/DataSetHeaderUpdater.h" #include "calvin_files/writers/src/DataSetWriter.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class updates the data set header of an existing file with new information. */ class DataGroupHeaderUpdater { public: /*! Constructor */ DataGroupHeaderUpdater(std::ofstream& fileStream, DataGroupHeader &hdr); /*! Destructor */ ~DataGroupHeaderUpdater(); void UpdateNextDataGroupOffset(u_int32_t offset); void UpdateLeadDataSetOffset(u_int32_t offset); void UpdateDataSetCount(u_int32_t count); /*! Append a new data set in this data group. * @param setHdr The new data set that will be appended. */ void AppendDataSetHeader(DataSetHeader& setHdr); /*! Replace a data set in this data group with a new one. * @param targetName The name of the data set to be replaced. * @param newHdr The new data set that will be written into the file. */ void ReplaceDataSetHeader(std::wstring targetName, DataSetHeader& newHdr); private: /*! Open output filestream */ std::ofstream* os; /*! The data set header */ DataGroupHeader* grpHdr; void SeekNextDataGrpPosition(); void SeekLeadDataSetPosition(); void SeekDataSetCntPosition(); }; } #endif // _DataGroupHeaderUpdater_HEADER_ affxparser/src/fusion/calvin_files/writers/src/DataGroupHeaderWriter.cpp0000644000175200017520000000444514516003651027663 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/DataGroupHeaderWriter.h" // #include "calvin_files/writers/src/FileOutput.h" // using namespace affymetrix_calvin_io; DataGroupHeaderWriter::DataGroupHeaderWriter() { dataSetPos = 0; nextDataGroupPos = 0; } void DataGroupHeaderWriter::Write(std::ofstream &os, const DataGroupHeader &dc) { WriteNextDataGroupPos(os, 0); WriteDataSetPos(os, 0); WriteDataSetCnt(os, dc); WriteName(os, dc); } void DataGroupHeaderWriter::WriteDataSetPos(std::ofstream &os, u_int32_t pos) { dataSetPos = os.tellp(); FileOutput::WriteInt32(os, pos); } void DataGroupHeaderWriter::UpdateDataSetPos(std::ofstream &os, u_int32_t pos) const { if(dataSetPos > 0) { os.seekp(dataSetPos, std::ios::beg); FileOutput::WriteUInt32(os, pos); os.seekp(pos, std::ios::beg); } } void DataGroupHeaderWriter::WriteNextDataGroupPos(std::ofstream &os, u_int32_t pos) { nextDataGroupPos = os.tellp(); FileOutput::WriteInt32(os, pos); } void DataGroupHeaderWriter::UpdateNextDataGroupPos(std::ofstream &os, u_int32_t pos) const { if(nextDataGroupPos > 0) { os.seekp(nextDataGroupPos, std::ios::beg); FileOutput::WriteUInt32(os, pos); os.seekp(pos, std::ios::beg); } } void DataGroupHeaderWriter::WriteName(std::ofstream &os, const DataGroupHeader &dc) const { FileOutput::WriteString16(os, dc.GetName()); } void DataGroupHeaderWriter::WriteDataSetCnt(std::ofstream &os, const DataGroupHeader &dc) const { FileOutput::WriteInt32(os, dc.GetDataSetCnt()); } affxparser/src/fusion/calvin_files/writers/src/DataGroupHeaderWriter.h0000644000175200017520000000352714516003651027330 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataGroupHeaderWriter_HEADER_ #define _DataGroupHeaderWriter_HEADER_ #include "calvin_files/data/src/DataGroupHeader.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class DataGroupHeaderWriter { private: u_int32_t dataSetPos; u_int32_t nextDataGroupPos; public: DataGroupHeaderWriter(); ~DataGroupHeaderWriter() {} void Write(std::ofstream &os, const DataGroupHeader &dc); void UpdateDataSetPos(std::ofstream &os, u_int32_t pos) const; void UpdateNextDataGroupPos(std::ofstream &os, u_int32_t pos) const; private: void WriteDataSetPos(std::ofstream &os, u_int32_t pos); void WriteNextDataGroupPos(std::ofstream &os, u_int32_t pos); void WriteName(std::ofstream &os, const DataGroupHeader &dc) const; void WriteDataSetCnt(std::ofstream &os, const DataGroupHeader &dc) const; }; } #endif // _DataGroupHeaderWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/DataGroupWriter.cpp0000644000175200017520000000500314516003651026541 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/DataGroupWriter.h" // #include "calvin_files/writers/src/DataGroupHeaderWriter.h" #include "calvin_files/writers/src/FileWriteException.h" // using namespace affymetrix_calvin_io; DataGroupWriter::DataGroupWriter(std::ofstream* o, DataGroupHeader* p) { os = o; dataGroupHdr = p; CreateWriters(); } DataGroupWriter::~DataGroupWriter() { writers.clear(); } std::wstring DataGroupWriter::GetName() { return dataGroupHdr->GetName(); } void DataGroupWriter::WriteHeader() { dataGroupHdrWriter.Write(*os, *dataGroupHdr); u_int32_t currentPos = os->tellp(); dataGroupHdrWriter.UpdateDataSetPos(*os, currentPos); } void DataGroupWriter::Close() const { u_int32_t currentPos = os->tellp(); dataGroupHdrWriter.UpdateNextDataGroupPos(*os, currentPos); } void DataGroupWriter::GetDataSetWriters(DataSetWriterIt &begin, DataSetWriterIt &end) { begin = writers.begin(); end = writers.end(); } DataSetWriter& DataGroupWriter::GetDataSetWriter(int32_t index) { return writers[index]; } int32_t DataGroupWriter::GetDataSetWriterCnt() const { return (int32_t)writers.size(); } void DataGroupWriter::CreateWriters() { int sz = dataGroupHdr->GetDataSetCnt(); for(int i = 0; i < sz; i++) { DataSetWriter* p = CreateDataSetWriter(dataGroupHdr->GetDataSet(i)); writers.push_back(*p); delete p; } } DataSetWriter* DataGroupWriter::CreateDataSetWriter(DataSetHeader& hdr) { return new DataSetWriter(os, &hdr); } std::wstring DataGroupWriter::GetDataGroupName() const { return dataGroupHdr->GetName(); } void DataGroupWriter::UpdateNextDataGroupPos() const { u_int32_t currentPos = os->tellp(); dataGroupHdrWriter.UpdateNextDataGroupPos(*os, currentPos); } affxparser/src/fusion/calvin_files/writers/src/DataGroupWriter.h0000644000175200017520000000415314516003651026213 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataGroupWriter_HEADER_ #define _DataGroupWriter_HEADER_ #include "calvin_files/data/src/DataGroupHeader.h" #include "calvin_files/writers/src/DataGroupHeaderWriter.h" #include "calvin_files/writers/src/DataSetWriter.h" // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class DataGroupWriter { private: std::ofstream* os; DataGroupHeader* dataGroupHdr; DataGroupHeaderWriter dataGroupHdrWriter; DataSetWriterVector writers; public: DataGroupWriter(std::ofstream* o, DataGroupHeader* p); ~DataGroupWriter(); std::wstring GetName(); void WriteHeader(); void Close() const; void GetDataSetWriters(DataSetWriterIt &begin, DataSetWriterIt &end); DataSetWriter& GetDataSetWriter(int32_t index); int32_t GetDataSetWriterCnt() const; std::wstring GetDataGroupName() const; DataSetWriter* CreateDataSetWriter(DataSetHeader& hdr); void UpdateNextDataGroupPos() const; private: void CreateWriters(); }; /*! vector of DataGroupHeaderWriters */ typedef std::vector DataGroupWriterVector; /*! constant iterator of DataGroupHeaderWriters */ typedef std::vector::iterator DataGroupWriterIt; } #endif // _DataGroupWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/DataSetHeaderAppend.cpp0000644000175200017520000001317614516003651027256 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/DataSetHeaderAppend.h" // #include "calvin_files/parsers/src/GenericFileReader.h" #include "calvin_files/utils/src/StringUtils.h" #include "calvin_files/writers/src/GenericFileUpdater.h" // #ifdef _HAS_APT_VERBOSE_ #include "util/Err.h" #include "util/Verbose.h" #endif using namespace std; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_utilities; using namespace affymetrix_calvin_parameter; #ifdef _HAS_APT_VERBOSE_ #define VERBOSE(level, msg) { Verbose::out(level, msg); } #define ERR(msg) { Err::errAbort(msg); } #else #define VERBOSE(level, msg) { } #define ERR(msg) { throw msg; } #endif static void AddDataSetHeaderToGroup(GenericFileWriter *gwf, int groupIndex, int nsets, FileHeader &fileHdr, const wstring &setName, const ParameterNameValueTypeList ¶ms) { DataSetHeader hdr; hdr.SetName(setName); hdr.SetRowCnt(0); for (ParameterNameValueTypeList::const_iterator it=params.begin(); it!=params.end(); it++) { hdr.AddNameValParam(*it); } // Write the file DataSetWriter* writer = NULL; try { int groupPos = fileHdr.GetFirstDataGroupFilePos(); if (groupIndex > 0) groupPos = fileHdr.GetDataGroup(groupIndex-1).GetNextGroupPos(); if (nsets == 0) { gwf->SeekFromEndPos(0); int setPos = gwf->GetFilePos(); gwf->SeekFromBeginPos(groupPos + 4); gwf->Write(setPos); } gwf->SeekFromBeginPos(groupPos + 8); gwf->Write(nsets + 1); writer = gwf->GetDataGroupWriter(groupIndex).CreateDataSetWriter(hdr); gwf->SeekFromEndPos(0); writer->WriteHeader(); writer->UpdateNextDataSetOffset(); delete writer; } catch (...) { delete writer; writer = NULL; ERR("Failed to update the file"); } } void DataSetHeaderAppend::AddDataSetHeader(const string &file, const wstring &groupName, const list ¶ms) { GenericData gData; GenericFileReader reader; reader.SetFilename(file); reader.ReadHeader(gData); gData.Close(); try { FileHeader &fileHdr = gData.Header(); int32_t ng = fileHdr.GetDataGroupCnt(); bool groupFound = false; int groupIndex = 0; for (int ig=0; ig::const_iterator it=params.begin(); it!=params.end(); it++) { //bool lastSet = (is == (ns-1) && ig == (ng-1)); //if (params.size() > 1) // lastSet = false; if (dataSetHdr.GetName() == it->dataSetName/* && lastSet == false*/) { u_int32_t offset = dataSetHdr.GetHeaderStartFilePos(); GenericFileWriter* writer = new GenericFileWriter(&fileHdr, false); writer->SeekFromBeginPos(offset); wstring name = dataSetHdr.GetName(); for (int i=0; i<(int)name.length(); i++) name[i] = '-'; dataSetHdr.SetName(name); DataSetWriter& setWriter = writer->GetDataGroupWriter(ig).GetDataSetWriter(is); setWriter.WriteHeader(); setWriter.UpdateNextDataSetOffset(); delete writer; break; } } } } if (groupFound == false) { groupIndex = ng; DataGroupHeader dgh; dgh.SetName(groupName); dgh.SetNextGroupPos(0); dgh.SetDataSetPos(0); fileHdr.AddDataGroupHdr(dgh); GenericFileWriter* writer = new GenericFileWriter(&fileHdr, false); writer->WriteHeader(); for (int ig=0; igGetFilePos(); writer->SeekFromEndPos(0); int endPos = writer->GetFilePos(); currentHdr.SetNextGroupPos(endPos); writer->SeekFromBeginPos(currentPos); DataGroupWriter &groupWriter = writer->GetDataGroupWriter(ig); groupWriter.WriteHeader(); writer->SeekFromBeginPos(currentHdr.GetNextGroupPos()); groupWriter.UpdateNextDataGroupPos(); } writer->SeekFromBeginPos(currentHdr.GetNextGroupPos()); } DataGroupWriter &groupWriter = writer->GetDataGroupWriter(ng); groupWriter.WriteHeader(); writer->SeekFromBeginPos(0); groupWriter.UpdateNextDataGroupPos(); delete writer; } VERBOSE(1, "Merging data into the file"); GenericFileWriter *gwf = new GenericFileWriter(&fileHdr, false); DataGroupHeader &dgh = fileHdr.GetDataGroup(groupIndex); int nsets = dgh.GetDataSetCnt(); for (list::const_iterator it=params.begin(); it!=params.end(); it++) { AddDataSetHeaderToGroup(gwf, groupIndex, nsets, fileHdr, it->dataSetName, it->params); ++nsets; } delete gwf; } catch (...) { ERR("Error updating the file"); } } affxparser/src/fusion/calvin_files/writers/src/DataSetHeaderAppend.h0000644000175200017520000000424214516003651026715 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataSetHeaderAppend_HEADER_ #define _DataSetHeaderAppend_HEADER_ #include "calvin_files/parameter/src/ParameterNameValueType.h" // #include #include // namespace affymetrix_calvin_io { /*! Information for a data set. */ typedef struct _DataSetHeaderInformation { /*! The parameters for the data set */ affymetrix_calvin_parameter::ParameterNameValueTypeList params; /*! The data set name */ std::wstring dataSetName; } DataSetHeaderInformation; /*! A class to provide functions to add a data set to an existing file. */ class DataSetHeaderAppend { private: /*! Add a data set header to the file * @param file The name of the file. * @param params The parameters for the data set. * @param groupName The name of the data group. * @param setName The name of the data set. */ //void AddDataSetHeader(const std::string &file, const affymetrix_calvin_parameter::ParameterNameValueTypeList ¶ms, const std::wstring &groupName, const std::wstring &setName); public: /*! Add a data set header to the file * @param file The name of the file. * @param groupName The name of the group for the data sets. * @param params A list of data set name/parameters. */ void AddDataSetHeader(const std::string &file, const std::wstring &groupName, const std::list ¶ms); }; } #endif affxparser/src/fusion/calvin_files/writers/src/DataSetHeaderUpdater.cpp0000644000175200017520000000570014516003651027445 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/DataSetHeaderUpdater.h" // #include "calvin_files/writers/src/FileOutput.h" // using namespace std; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; #define WCHAR_T_SIZE 2 /* * Constructor */ DataSetHeaderUpdater::DataSetHeaderUpdater(std::ofstream& fileStream, DataSetHeader &hdr) { os = &fileStream; setHdr = &hdr; filePos = hdr.GetHeaderStartFilePos(); filePos += sizeof(u_int32_t); // data position filePos += sizeof(u_int32_t); // next data set position filePos += sizeof(int32_t); // name length filePos += (u_int32_t)(WCHAR_T_SIZE*hdr.GetName().length()); // data set name filePos += sizeof(u_int32_t); // #parameters } /* * Destructor */ DataSetHeaderUpdater::~DataSetHeaderUpdater() { } /* * Update the parameter list. */ bool DataSetHeaderUpdater::UpdateParameter(ParameterNameValueType &nvt) { u_int32_t pos = filePos; ParameterNameValueType param; ParameterNameValueTypeConstIt b; ParameterNameValueTypeConstIt e; setHdr->GetNameValIterators(b, e); while (b != e) { param = *b; pos += sizeof(int32_t) + (int32_t)param.GetName().length()*WCHAR_T_SIZE; // Check that the types match and the values are the same size MIMEValue paramMIME = param.GetMIMEValue(); MIMEValue nvtMINE = nvt.GetMIMEValue(); if (nvt.GetName() == param.GetName() && nvt.GetParameterType() == param.GetParameterType() && nvtMINE.Size() == paramMIME.Size()) { os->seekp(pos, std::ios::beg); u_int32_t sz; const void* ptr = nvtMINE.GetValue(sz); FileOutput::WriteBlob(*os, ptr, sz); return true; } pos += sizeof(int32_t) + paramMIME.Size(); pos += sizeof(int32_t) + (int32_t)param.GetMIMEType().size()*WCHAR_T_SIZE; ++b; } return false; } void DataSetHeaderUpdater::SeekNextDataSetPosition() { u_int32_t pos = setHdr->GetHeaderStartFilePos(); pos += sizeof(u_int32_t); // data position os->seekp(pos, std::ios::beg); } void DataSetHeaderUpdater::UpdateNextDataSetPosition(u_int32_t position) { u_int32_t pos = os->tellp(); SeekNextDataSetPosition(); FileOutput::WriteUInt32(*os, position); os->seekp(pos, std::ios::beg); } affxparser/src/fusion/calvin_files/writers/src/DataSetHeaderUpdater.h0000644000175200017520000000407614516003651027117 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataSetHeaderUpdater_HEADER_ #define _DataSetHeaderUpdater_HEADER_ /*! \file DataSetHeaderUpdater.h This file defines a class that updates the data set header of an existing file. */ #include "calvin_files/data/src/DataSetHeader.h" #include "calvin_files/parameter/src/ParameterNameValueType.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class updates the data set header of an existing file with new information. */ class DataSetHeaderUpdater { public: /*! Constructor */ DataSetHeaderUpdater(std::ofstream& fileStream, DataSetHeader &hdr); /*! Destructor */ ~DataSetHeaderUpdater(); /*! Update the parameter list. * Can only update types that have the same name and type and value size in the source and target */ bool UpdateParameter(affymetrix_calvin_parameter::ParameterNameValueType &nvt); void UpdateNextDataSetPosition(u_int32_t position); private: /*! Open output filestream */ std::ofstream* os; /*! The data set header */ DataSetHeader* setHdr; u_int32_t filePos; void SeekNextDataSetPosition(); }; } #endif // _DataSetHeaderUpdater_HEADER_ affxparser/src/fusion/calvin_files/writers/src/DataSetHeaderWriter.cpp0000644000175200017520000000656414516003651027326 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/DataSetHeaderWriter.h" // #include "calvin_files/writers/src/FileOutput.h" // using namespace affymetrix_calvin_io; void DataSetHeaderWriter::Write(std::ofstream &os, DataSetHeader &dc) { WriteDataOffset(os, 0); WriteNextDataSetOffset(os, 0); WriteName(os, dc); WriteNameValCnt(os, dc); WriteNameValParams(os, dc); WriteColumnCnt(os, dc); WriteColumnTypes(os, dc); WriteRowCnt(os, dc); } void DataSetHeaderWriter::WriteDataOffset(std::ofstream &os, u_int32_t pos) { dataPos = os.tellp(); FileOutput::WriteInt32(os, pos); } void DataSetHeaderWriter::UpdateDataOffset(std::ofstream &os, u_int32_t pos) const { if(dataPos > 0) { os.seekp(dataPos, std::ios::beg); FileOutput::WriteUInt32(os, pos); os.seekp(pos, std::ios::beg); } } void DataSetHeaderWriter::WriteNextDataSetOffset(std::ofstream &os, u_int32_t pos) { nextDataSetPos = os.tellp(); FileOutput::WriteInt32(os, pos); } void DataSetHeaderWriter::UpdateNextDataSetOffset(std::ofstream &os, u_int32_t pos) const { if(nextDataSetPos > 0) { os.seekp(nextDataSetPos, std::ios::beg); FileOutput::WriteUInt32(os, pos); os.seekp(pos, std::ios::beg); } } void DataSetHeaderWriter::WriteName(std::ofstream &os, const DataSetHeader &dc) const { FileOutput::WriteString16(os, dc.GetName()); } void DataSetHeaderWriter::WriteNameValCnt(std::ofstream &os, const DataSetHeader &dc) const { FileOutput::WriteInt32(os, dc.GetNameValParamCnt()); } void DataSetHeaderWriter::WriteNameValParams(std::ofstream &os, DataSetHeader &dc) { ParameterNameValueTypeConstIt begin; ParameterNameValueTypeConstIt end; dc.GetNameValIterators(begin, end); while(begin != end) { FileOutput::WriteString16(os, begin->GetName()); MIMEValue mv = begin->GetMIMEValue(); u_int32_t sz; const void* ptr = mv.GetValue(sz); FileOutput::WriteBlob(os, ptr, sz); FileOutput::WriteString16(os, begin->GetMIMEType()); begin++; } } void DataSetHeaderWriter::WriteColumnCnt(std::ofstream &os, const DataSetHeader &dc) const { FileOutput::WriteInt32(os, dc.GetColumnCnt()); } void DataSetHeaderWriter::WriteColumnTypes(std::ofstream &os, const DataSetHeader &dc) const { int32_t sz = dc.GetColumnCnt(); // Write the types for(int i = 0; i < sz; i++) { ColumnInfo col = dc.GetColumnInfo(i); FileOutput::WriteString16(os, col.GetName()); FileOutput::WriteInt8(os, col.GetColumnType()); FileOutput::WriteInt32(os, col.GetSize()); } } void DataSetHeaderWriter::WriteRowCnt(std::ofstream &os, const DataSetHeader &dc) const { FileOutput::WriteInt32(os, dc.GetRowCnt()); } affxparser/src/fusion/calvin_files/writers/src/DataSetHeaderWriter.h0000644000175200017520000000413114516003651026757 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataSetHeaderWriter_HEADER_ #define _DataSetHeaderWriter_HEADER_ #include "calvin_files/data/src/DataSetHeader.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class DataSetHeaderWriter { private: u_int32_t dataPos; u_int32_t nextDataSetPos; public: DataSetHeaderWriter() { dataPos = 0; nextDataSetPos = 0; } ~DataSetHeaderWriter() {} void Write(std::ofstream &os, DataSetHeader &dc); void UpdateDataOffset(std::ofstream &os, u_int32_t pos) const; void UpdateNextDataSetOffset(std::ofstream &os, u_int32_t pos) const; private: void WriteDataOffset(std::ofstream &os, u_int32_t pos); void WriteNextDataSetOffset(std::ofstream &os, u_int32_t pos); void WriteName(std::ofstream &os, const DataSetHeader &dc) const; void WriteNameValCnt(std::ofstream &os, const DataSetHeader &dc) const; void WriteNameValParams(std::ofstream &os, DataSetHeader &dc); void WriteColumnTypes(std::ofstream &os, const DataSetHeader &dc) const; void WriteColumnCnt(std::ofstream &os, const DataSetHeader &dc) const; void WriteRowCnt(std::ofstream &os, const DataSetHeader &dc) const; }; }; #endif // _DataSetHeaderWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/DataSetUpdater.cpp0000644000175200017520000001257414516003651026343 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/DataSetUpdater.h" // #include "calvin_files/data/src/GenericData.h" #include "calvin_files/parsers/src/GenericFileReader.h" #include "calvin_files/writers/src/FileOutput.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_exceptions; using namespace std; /* * Initialize any needed members. */ DataSetUpdater::DataSetUpdater() { } /* * Clean up. */ DataSetUpdater::~DataSetUpdater() { } /* * Read the data file and cache the data headers. */ void DataSetUpdater::Initialize(const char *file) { fileName = file; GenericData data; GenericFileReader reader; reader.SetFilename(fileName); reader.Open(data); int ng = data.Header().GetDataGroupCnt(); positions.resize(ng); rowsizes.resize(ng); colsizes.resize(ng); dataSetNames.resize(ng); for (int ig=0; ig #include #include #include // namespace affymetrix_calvin_io { /*! Provides interfaces to update data in a "Calvin" binary data file. * This class assumes that the file has been completely written and that * only existing data groups/sets/rows/cols are being modified. */ class DataSetUpdater { protected: /*! The name of the file being updated. */ std::string fileName; /*! The position of a data set for all data sets in the file. */ std::vector > positions; /*! The size of a row for all data sets in the file. */ std::vector > rowsizes; /*! The size of a column for all data sets in the file. */ std::vector > > colsizes; /*! The data set names. */ std::vector > dataSetNames; /*! Seek to the position of the data. * @param groupIndex The index to the data groups. * @param setIndex The index to the data sets. * @param row The index to the rows. * @param col The index to the columns. */ void SeekToPosition(std::ofstream &os, int groupIndex, int setIndex, int row, int col); public: /*! Constructor */ DataSetUpdater(); /*! Destructor */ ~DataSetUpdater(); /*! Initialize the class given a "calvin" data file. * @param file The name of the file. * @exception affymetrix_calvin_exceptions::FileNotFoundException The file does not exist. * @exception affymetrix_calvin_exceptions::InvalidVersionException The file version does not match. * @exception affymetrix_calvin_exceptions::InvalidFileTypeException The file is not of the right type. */ virtual void Initialize(const char *file); /*! Updates a string value in the data set * @param groupIndex The index to the data groups. * @param setIndex The index to the data sets. * @param row The index to the rows. * @param col The index to the columns. * @param value The new value. */ void Update(int groupIndex, int setIndex, int row, int col, const std::string &value); /*! Updates a wstring value in the data set * @param groupIndex The index to the data groups. * @param setIndex The index to the data sets. * @param row The index to the rows. * @param col The index to the columns. * @param value The new value. */ void Update(int groupIndex, int setIndex, int row, int col, const std::wstring &value); /*! Updates an integer value in the data set * @param groupIndex The index to the data groups. * @param setIndex The index to the data sets. * @param row The index to the rows. * @param col The index to the columns. * @param value The new value. */ void Update(int groupIndex, int setIndex, int row, int col, int8_t value); /*! Updates an integer value in the data set * @param groupIndex The index to the data groups. * @param setIndex The index to the data sets. * @param row The index to the rows. * @param col The index to the columns. * @param value The new value. */ void Update(int groupIndex, int setIndex, int row, int col, int16_t value); /*! Updates an integer value in the data set * @param groupIndex The index to the data groups. * @param setIndex The index to the data sets. * @param row The index to the rows. * @param col The index to the columns. * @param value The new value. */ void Update(int groupIndex, int setIndex, int row, int col, int32_t value); /*! Updates an integer value in the data set * @param groupIndex The index to the data groups. * @param setIndex The index to the data sets. * @param row The index to the rows. * @param col The index to the columns. * @param value The new value. */ void Update(int groupIndex, int setIndex, int row, int col, u_int8_t value); /*! Updates an integer value in the data set * @param groupIndex The index to the data groups. * @param setIndex The index to the data sets. * @param row The index to the rows. * @param col The index to the columns. * @param value The new value. */ void Update(int groupIndex, int setIndex, int row, int col, u_int16_t value); /*! Updates an integer value in the data set * @param groupIndex The index to the data groups. * @param setIndex The index to the data sets. * @param row The index to the rows. * @param col The index to the columns. * @param value The new value. */ void Update(int groupIndex, int setIndex, int row, int col, u_int32_t value); /*! Updates a floating point value in the data set * @param groupIndex The index to the data groups. * @param setIndex The index to the data sets. * @param row The index to the rows. * @param col The index to the columns. * @param value The new value. */ void Update(int groupIndex, int setIndex, int row, int col, float value); }; } #endif // _DataSetUpdater_HEADER_ affxparser/src/fusion/calvin_files/writers/src/DataSetWriter.cpp0000644000175200017520000000514614516003651026210 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/DataSetWriter.h" // using namespace affymetrix_calvin_io; DataSetWriter::DataSetWriter(std::ofstream* s, DataSetHeader* d) { os = s; dataSetHdr = d; columnCnt = dataSetHdr->GetColumnCnt(); columnIdx = 0; rowIdx = 0; rowCnt = dataSetHdr->GetRowCnt(); } void DataSetWriter::WriteHeader() { hdrWriter.Write(*os, *dataSetHdr); u_int32_t currentPos = os->tellp(); dataSetHdr->SetDataStartFilePos(currentPos); hdrWriter.UpdateDataOffset(*os, currentPos); } void DataSetWriter::UpdateNextDataSetOffset() const { u_int32_t currentPos = os->tellp(); UpdateNextDataSetOffset(currentPos); } void DataSetWriter::UpdateNextDataSetOffset(u_int32_t pos) const { dataSetHdr->SetNextSetFilePos(pos); hdrWriter.UpdateNextDataSetOffset(*os, pos); } std::wstring DataSetWriter::GetDataSetName() const { return dataSetHdr->GetName(); } int32_t DataSetWriter::GetDataSetSize() const { return dataSetHdr->GetDataSize(); } void DataSetWriter::Write(int8_t p) { FileOutput::WriteInt8(*os, p); } void DataSetWriter::Write(u_int8_t p) { FileOutput::WriteUInt8(*os, p); } void DataSetWriter::Write(int16_t p) { FileOutput::WriteInt16(*os, p); } void DataSetWriter::Write(u_int16_t p) { FileOutput::WriteUInt16(*os, p); } void DataSetWriter::Write(int32_t p) { FileOutput::WriteInt32(*os, p); } void DataSetWriter::Write(u_int32_t p) { FileOutput::WriteUInt32(*os, p); } void DataSetWriter::Write(float p) { FileOutput::WriteFloat(*os, p); } void DataSetWriter::Write(const std::string &p, int32_t maxLn) { FileOutput::WriteString8(*os, p, maxLn); } void DataSetWriter::Write(const std::wstring &p, int32_t maxLn) { FileOutput::WriteString16(*os, p, maxLn); } void DataSetWriter::WriteBuffer(char* psBuffer, int32_t iLength) { os->write(psBuffer, iLength); } affxparser/src/fusion/calvin_files/writers/src/DataSetWriter.h0000644000175200017520000000441314516003651025651 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DataSetWriter_HEADER_ #define _DataSetWriter_HEADER_ #include "calvin_files/data/src/DataSetHeader.h" #include "calvin_files/writers/src/DataSetHeaderWriter.h" #include "calvin_files/writers/src/FileOutput.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class DataSetWriter { public: DataSetWriter(std::ofstream* s, DataSetHeader* d); ~DataSetWriter() { os->flush(); }; private: DataSetHeader* dataSetHdr; DataSetHeaderWriter hdrWriter; std::ofstream* os; int32_t columnCnt; int32_t columnIdx; int32_t rowIdx; int32_t rowCnt; public: void WriteHeader(); void UpdateNextDataSetOffset() const; void UpdateNextDataSetOffset(u_int32_t pos) const; void Write(int8_t p); void Write(u_int8_t p); void Write(int16_t p); void Write(u_int16_t p); void Write(int32_t p); void Write(u_int32_t p); void Write(float p); void Write(const std::string &p, int32_t maxLn); void Write(const std::wstring &p, int32_t maxLn); void WriteBuffer(char* psBuffer, int32_t iLength); std::wstring GetDataSetName() const; int32_t GetDataSetSize() const; private: }; /*! vector of DataSetHeaders */ typedef std::vector DataSetWriterVector; /*! constant iterator of DataSetHeaders */ typedef std::vector::iterator DataSetWriterIt; } #endif // _DataSetWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/FileHeaderUpdater.cpp0000644000175200017520000001041714516003651027000 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/FileHeaderUpdater.h" // #include "calvin_files/writers/src/GenericDataHeaderUpdater.h" // using namespace affymetrix_calvin_io; /* * Constructor */ FileHeaderUpdater::FileHeaderUpdater() { os = 0; fileHdr = 0; } FileHeaderUpdater::FileHeaderUpdater(std::ofstream& ostrm, FileHeader& hdr) { os = &ostrm; fileHdr = &hdr; } /* * Destructor */ FileHeaderUpdater::~FileHeaderUpdater() { } /* * Update the file with new FileHeader information */ bool FileHeaderUpdater::Update(std::ofstream& ostrm, FileHeader& updateHdr, FileHeader& currentHdr) { // There is nothing at this time to update in the FileHeader // Move on to the GenericDataHeader GenericDataHeaderUpdater genUpdater; return genUpdater.Update(ostrm, *updateHdr.GetGenericDataHdr(), *currentHdr.GetGenericDataHdr()); } //void FileHeaderUpdater::UpdateVersion(int8_t version) //{ // u_int32_t pos = os->tellp(); // SeekVersionPosition(); // FileOutput::WriteInt8(*os, version); // os->seekp(pos, std::ios::beg); //} // //void FileHeaderUpdater::UpdateMagicNumber(int8_t magic) //{ // u_int32_t pos = os->tellp(); // os->seekp(0, std::ios::beg); // FileOutput::WriteInt8(*os, magic); // os->seekp(pos, std::ios::beg); //} void FileHeaderUpdater::UpdateDataGroupCount(int32_t count) { u_int32_t pos = os->tellp(); SeekDataGrpCntPosition(); FileOutput::WriteUInt32(*os, count); os->seekp(pos, std::ios::beg); } void FileHeaderUpdater::SeekVersionPosition() { os->seekp(0, std::ios::beg); int32_t pos = os->tellp(); pos += sizeof(int8_t); // magic number position os->seekp(pos, std::ios::beg); } void FileHeaderUpdater::SeekDataGrpCntPosition() { os->seekp(0, std::ios::beg); int32_t pos = os->tellp(); pos += sizeof(int8_t); // magic number position pos += sizeof(int8_t); // version position os->seekp(pos, std::ios::beg); } void FileHeaderUpdater::SeekLeadDataGrpPosition() { os->seekp(0, std::ios::beg); int32_t pos = os->tellp(); pos += sizeof(int8_t); // magic number position pos += sizeof(int8_t); // version position pos += sizeof(u_int32_t); // data group count position os->seekp(pos, std::ios::beg); } void FileHeaderUpdater::UpdateLeadDataGroupOffset(u_int32_t offset) { u_int32_t pos = os->tellp(); SeekLeadDataGrpPosition(); FileOutput::WriteUInt32(*os, offset); os->seekp(pos, std::ios::beg); } void FileHeaderUpdater::AppendDataGroupHeader(DataGroupHeader& grpHdr) { os->seekp(0, std::ios::end); int sz = fileHdr->GetDataGroupCnt(); u_int32_t grpPos = os->tellp(); grpHdr.SetHeaderStartFilePos(grpPos); if(sz > 0) { //update previous data group with new position DataGroupHeader lastHdr = fileHdr->GetDataGroup(sz - 1); DataGroupHeaderUpdater lastGrpUpdater(*os, lastHdr); lastGrpUpdater.UpdateNextDataGroupOffset(grpPos); } else { //this will be the first data group UpdateLeadDataGroupOffset(grpPos); } //increment the number of data sets in the group UpdateDataGroupCount(sz + 1); DataGroupWriter grpWriter(os, &grpHdr); grpWriter.WriteHeader(); grpHdr.SetDataSetPos(os->tellp()); DataSetWriterIt begin, end; grpWriter.GetDataSetWriters(begin, end); while(begin != end) { begin->WriteHeader(); os->seekp(begin->GetDataSetSize(), std::ios::cur); begin->UpdateNextDataSetOffset(); begin++; } //keep the next offset at zero since this will be the last data group and it complies with the file spec //grpWriter.UpdateNextDataGroupPos(); //grpHdr.SetNextGroupPos(os->tellp()); fileHdr->AddDataGroupHdr(grpHdr); } affxparser/src/fusion/calvin_files/writers/src/FileHeaderUpdater.h0000644000175200017520000000466314516003651026453 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FileHeaderUpdater_HEADER_ #define _FileHeaderUpdater_HEADER_ /*! \file FileHeaderUpdater.h This file defines a class that updates the FileHeader of an existing file. */ #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/writers/src/DataGroupHeaderUpdater.h" #include "calvin_files/writers/src/DataGroupWriter.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class updates the FileHeader of an existing file with new information. */ class FileHeaderUpdater { public: /*! Constructor */ FileHeaderUpdater(); /*! Constructor */ FileHeaderUpdater(std::ofstream& ostrm, FileHeader& fileHdr); /*! Destructor */ ~FileHeaderUpdater(); /*! Update the file with new FileHeader information * @param os An output stream open with std::ios::out|std::ios::binary|std::ios::in. * @param updateHdr A FileHeader with information to update the file. * @param currentHdr A FileHeader with information current in the file. * @return Returns true if successful. */ bool Update(std::ofstream& ostrm, FileHeader& updateHdr, FileHeader& currentHdr); void AppendDataGroupHeader(DataGroupHeader& grpHdr); void UpdateDataGroupCount(int32_t count); void UpdateLeadDataGroupOffset(u_int32_t pos); private: /*! Open output filestream */ std::ofstream* os; /*! The data set header */ FileHeader* fileHdr; void SeekDataGrpCntPosition(); void SeekLeadDataGrpPosition(); void SeekVersionPosition(); }; } #endif // _FileHeaderUpdater_HEADER_ affxparser/src/fusion/calvin_files/writers/src/FileHeaderWriter.cpp0000644000175200017520000000475614516003651026661 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/FileHeaderWriter.h" // #include "calvin_files/utils/src/AffymetrixGuid.h" #include "calvin_files/writers/src/FileOutput.h" #include "calvin_files/writers/src/GenericDataHeaderWriter.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_utilities; void FileHeaderWriter::Write(std::ofstream &os, FileHeader &g) { WriteMagicNumber(os, g); WriteVersion(os, g); WriteDataGroupCnt(os, g); WriteDataGroupOffset(os, 0); WriteGenericDataHdr(os, g); } void FileHeaderWriter::WriteMagicNumber(std::ofstream &os, FileHeader &g) { FileOutput::WriteInt8(os, g.GetMagicNumber()); } void FileHeaderWriter::WriteVersion(std::ofstream &os, FileHeader &g) { FileOutput::WriteInt8(os, g.GetVersion()); } void FileHeaderWriter::WriteDataGroupCnt(std::ofstream &os, FileHeader &g) { FileOutput::WriteInt32(os, g.GetDataGroupCnt()); } void FileHeaderWriter::WriteDataGroupOffset(std::ofstream &os, u_int32_t offset) { dataSetOffsetPos = os.tellp(); FileOutput::WriteUInt32(os, offset); } void FileHeaderWriter::UpdateDataGroupOffset(std::ofstream &os, u_int32_t offset) const { if(dataSetOffsetPos > 0) { os.seekp(dataSetOffsetPos, std::ios::beg); FileOutput::WriteUInt32(os, offset); os.seekp(offset, std::ios::beg); } } void FileHeaderWriter::WriteGenericDataHdr(std::ofstream &os, FileHeader &g) { GenericDataHeaderWriter writer; // Check if a file ID has been assign, if not assign one. if (g.GetGenericDataHdr()->GetFileId().length() == 0) g.GetGenericDataHdr()->SetFileId(AffymetrixGuid::GenerateNewGuid()); writer.Write(os, *(g.GetGenericDataHdr())); } affxparser/src/fusion/calvin_files/writers/src/FileHeaderWriter.h0000644000175200017520000000335314516003651026316 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FileHeaderWriter_HEADER_ #define _FileHeaderWriter_HEADER_ #include "calvin_files/data/src/FileHeader.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class FileHeaderWriter { private: u_int32_t dataSetOffsetPos; public: FileHeaderWriter() { dataSetOffsetPos = 0; } ~FileHeaderWriter() {} void Write(std::ofstream &os, FileHeader &p); void UpdateDataGroupOffset(std::ofstream &os, u_int32_t offset) const; private: void WriteMagicNumber(std::ofstream &os, FileHeader &p); void WriteVersion(std::ofstream &os, FileHeader &p); void WriteDataGroupCnt(std::ofstream &os, FileHeader &p); void WriteDataGroupOffset(std::ofstream &os, u_int32_t offset); void WriteGenericDataHdr(std::ofstream &os, FileHeader &f); }; } #endif // _FileHeaderWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/FileOutput.cpp0000644000175200017520000001247714516003651025573 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifdef _MSC_VER #include "windows.h" #endif // #include "calvin_files/writers/src/FileOutput.h" // #include #include #include #include #include // #ifndef _MSC_VER #include #include #include #endif using namespace affymetrix_calvin_io; /* * Write an 8 bit number to the file. */ void FileOutput::WriteInt8(std::ofstream &outstr, int8_t value) { outstr.write((char *)&value, sizeof(value)); } /* * Write a 16 bit number to the file. */ void FileOutput::WriteInt16(std::ofstream &outstr, int16_t value) { value = htons(value); outstr.write((char *)&value, sizeof(value)); } /* * Write a 32 bit number to the file. */ void FileOutput::WriteInt32(std::ofstream &outstr, int32_t value) { value = htonl(value); outstr.write((char *)&value, sizeof(value)); } /* * Write an 8 bit unsigned number to the file. */ void FileOutput::WriteUInt8(std::ofstream &outstr, u_int8_t value) { outstr.write((char *)&value, sizeof(value)); } /* * Write a 16 bit unsigned number to the file. */ void FileOutput::WriteUInt16(std::ofstream &outstr, u_int16_t value) { value = htons(value); outstr.write((char *)&value, sizeof(value)); } /* * Write a 32 bit unsigned number to the file. */ void FileOutput::WriteUInt32(std::ofstream &outstr, u_int32_t value) { value = htonl(value); outstr.write((char *)&value, sizeof(value)); } /* * Write a 32 bit floating point value from a file. */ void FileOutput::WriteFloat(std::ofstream &outstr, float value) { type_punned pun; pun.v_float=value; WriteInt32(outstr,pun.v_int32); } /* * Write a string to the file stream. */ void FileOutput::WriteString8(std::ofstream &outstr, const std::string &value) { int32_t len = (int32_t) value.length(); FileOutput::WriteInt32(outstr, len); FileOutput::WriteString8(outstr, value.c_str(), len); } /* * Write a string to the file stream. Pad with nulls. */ void FileOutput::WriteString8(std::ofstream &outstr, const char *value, int32_t len) { // how much padding will we need? int pad_len; int str_len; if (value==NULL) { pad_len=len; } else { str_len=strlen(value); if (len #include #include // namespace affymetrix_calvin_io { class FileOutput { public: /*! Writes an 8 bit integer from a big endian file. * * @param outstr The output file stream. * @param value The value to write to the file. */ static void WriteInt8(std::ofstream &outstr, int8_t value); /*! Writes a 16 bit integer from a big endian file. * * @param outstr The output file stream. * @param value The value to write to the file. */ static void WriteInt16(std::ofstream &outstr, int16_t value); /*! Writes a 32 bit integer from a big endian file. * * @param outstr The output file stream. * @param value The value to write to the file. */ static void WriteInt32(std::ofstream &outstr, int32_t value); /*! Writes an 8 bit unsigned integer from a big endian file. * * @param outstr The output file stream. * @param value The value to write to the file. */ static void WriteUInt8(std::ofstream &outstr, u_int8_t value); /*! Writes a 16 bit unsigned integer from a big endian file. * * @param outstr The output file stream. * @param value The value to write to the file. */ static void WriteUInt16(std::ofstream &outstr, u_int16_t value); /*! Writes a 32 bit unsigned integer from a big endian file. * * @param outstr The output file stream. * @param value The value to write to the file. */ static void WriteUInt32(std::ofstream &outstr, u_int32_t value); /*! Writes a 32 bit floating point number from a big endian file. * * @param outstr The output file stream. * @param value The value to write to the file. */ static void WriteFloat(std::ofstream &outstr, float value); /*! Writes a 16 bit unicode string of fixed size from a big endian file. * * @param outstr The output file stream. * @param value The value to write to the file. * @param len The length of the string. */ static void WriteString16(std::ofstream &outstr, const wchar_t *value, int32_t len); /*! Writes a 16 bit unicode string from a big endian file. * * @param outstr The output file stream. * @param value The value to write to the file. */ static void WriteString16(std::ofstream &outstr, const std::wstring &value); /*! Writes an 8 bit string of fixed size from a big endian file. * * @param outstr The output file stream. * @param value The value to write to the file. * @param len The length of the string. */ static void WriteString8(std::ofstream &outstr, const char *value, int32_t len); /*! Writes an 8 bit string from a big endian file. * * @param outstr The output file stream. * @param value The value to write to the file. */ static void WriteString8(std::ofstream &outstr, const std::string &value); /*! Writes an 8 bit string from a big endian file with a fixed maximum size. * If the string length is less than maxLn then null's are appended. * * @param outstr The output file stream. * @param value The value to write to the file. * @param maxLn Maximum length of the string. */ static void WriteString8(std::ofstream &outstr, const std::string &value, int32_t maxLn); /*! Writes an 16 bit string from a big endian file with a fixed maximum size. * If the string length is less than maxLn then null's are appended. * * @param outstr The output file stream. * @param value The value to write to the file. * @param maxLn Maximum length of the string. */ static void WriteString16(std::ofstream &outstr, const std::wstring &value, int32_t maxLn); /*! Writes a blob to a file (as is; no htonl). It is the responsibility of the caller to ensure * the blob is in a portable form. * @param outstr The output file stream. * @param value A pointer to the blob. * @param size The size of the blob. */ static void WriteBlob(std::ofstream &outstr, const void* value, int32_t size); /*! Writes a blob to a file with reserved space(as is; no htonl). It is the responsibility of the caller to ensure * the blob is in a portable form. * @param outstr The output file stream. * @param value A pointer to the blob. * @param data_size The size of the blob. * @param reserved_size The size of the reserved space (total size including the blob size). */ static void WriteBlob(std::ofstream &outstr, const void* value, int32_t data_size, int32_t reserved_size); }; } #endif // _FileOutput_HEADER_ affxparser/src/fusion/calvin_files/writers/src/FileWriteException.cpp0000644000175200017520000000235214516003651027233 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/FileWriteException.h" // namespace affymetrix_calvin_exceptions { const std::wstring FileCreateException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::FileCreateException thrown."; } const std::wstring FileWriteException::ToString() { return SystemInfo() + L"affymetrix_calvin_exceptions::FileWriteException thrown."; } } affxparser/src/fusion/calvin_files/writers/src/FileWriteException.h0000644000175200017520000000367614516003651026712 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _FileWriteException_HEADER_ #define _FileWriteException_HEADER_ /*! \file FileWriteException.h This file provides class definition for the file writing exceptions. */ #include "calvin_files/exception/src/ExceptionBase.h" // namespace affymetrix_calvin_exceptions { class FileCreateException : public CalvinException { public: FileCreateException() : CalvinException() {} FileCreateException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; class FileWriteException : public CalvinException { public: FileWriteException() : CalvinException() {} FileWriteException(std::wstring _Source, std::wstring _Description, std::wstring _TimeStamp, std::string _FileName, u_int16_t _LineNumber, u_int64_t _ErrorCode): CalvinException(_Source, _Description, _TimeStamp, _FileName, _LineNumber, _ErrorCode) {} const std::wstring ToString(); }; }; #endif // _FileWriteException_HEADER_ affxparser/src/fusion/calvin_files/writers/src/GenericDataHeaderUpdater.cpp0000644000175200017520000001053714516003651030272 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/GenericDataHeaderUpdater.h" // #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/writers/src/FileHeaderUpdater.h" #include "calvin_files/writers/src/FileOutput.h" // using namespace std; using namespace affymetrix_calvin_io; #define WCHAR_T_SIZE 2 /* * Constructor */ GenericDataHeaderUpdater::GenericDataHeaderUpdater() { os = 0; updateHdr = 0; currentHdr = 0; } /* * Destructor */ GenericDataHeaderUpdater::~GenericDataHeaderUpdater() { } /* * Update the file with the new GenericDataHeader information. */ bool GenericDataHeaderUpdater::Update(std::ofstream& fileStream, GenericDataHeader& updateHeader, GenericDataHeader& currentHeader) { os = &fileStream; updateHdr = &updateHeader; currentHdr = ¤tHeader; bool result = UpdateFileId(); UpdateParameters(); return result; } /* * Update the FileId. Only update the FileId if update and current fileIDs are the same size. */ bool GenericDataHeaderUpdater::UpdateFileId() { // Check if we can update the file Id; make sure the existing file Id is the same length as the new one if (currentHdr->GetFileId().length() == updateHdr->GetFileId().length() && updateHdr->GetFileId().length() != 0) { // Move the file stream position to the location of the file ID. int32_t offset = GetFileHeaderSize() + FIELD_LEN_SIZE + (int32_t)currentHdr->GetFileTypeId().length(); os->seekp(offset, std::ios::beg); // Write the new ID FileOutput::WriteString8(*os, updateHdr->GetFileId()); return true; } else return false; } /* * Update the parameter list. */ void GenericDataHeaderUpdater::UpdateParameters() { int32_t filePos = GetFileHeaderSize() + GetBytesFromGenericDataHdrStartToParameterList(); ParameterNameValueTypeIt begin, end; currentHdr->GetNameValIterators(begin, end); for (ParameterNameValueTypeIt ii = begin; ii != end; ++ii) { // Add name length to file offset filePos += FIELD_LEN_SIZE + (int32_t)ii->GetName().length()*WCHAR_T_SIZE; affymetrix_calvin_parameter::ParameterNameValueType nvt; if (updateHdr->FindNameValParam(ii->GetName(), nvt)) { // Check that the types match and the values are the same size if (ii->GetParameterType() == nvt.GetParameterType() && ii->GetMIMEValue().Size() >= nvt.GetMIMEValue().Size()) { // Safe to update - move to filePos os->seekp(filePos, std::ios::beg); // Write value MIMEValue mv = nvt.GetMIMEValue(); //existing value size u_int32_t size = ii->GetMIMEValue().Size(); //new value size u_int32_t sz; const void* ptr = mv.GetValue(sz); //write the value with reserved size FileOutput::WriteBlob(*os, ptr, sz, size); } } // Add Value length to offset filePos += FIELD_LEN_SIZE + ii->GetMIMEValue().Size(); // Add type length to offset filePos += FIELD_LEN_SIZE + (int32_t)ii->GetMIMEType().size()*WCHAR_T_SIZE; } } /* * Get the size of the FileHeader in bytes. */ int32_t GenericDataHeaderUpdater::GetFileHeaderSize() { return 2*sizeof(u_int8_t) + sizeof(u_int32_t) + sizeof(int32_t); } /* * Get the size of the GenericDataHeader up to the start of the parameter list. */ int32_t GenericDataHeaderUpdater::GetBytesFromGenericDataHdrStartToParameterList() { return FIELD_LEN_SIZE + (int32_t)currentHdr->GetFileTypeId().length() + FIELD_LEN_SIZE + (int32_t)currentHdr->GetFileId().length() + FIELD_LEN_SIZE + (int32_t)currentHdr->GetFileCreationTime().length()*WCHAR_T_SIZE + FIELD_LEN_SIZE + (int32_t)currentHdr->GetLocale().length()*WCHAR_T_SIZE + FIELD_LEN_SIZE; } affxparser/src/fusion/calvin_files/writers/src/GenericDataHeaderUpdater.h0000644000175200017520000000607114516003651027735 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GenericDataHeaderUpdater_HEADER_ #define _GenericDataHeaderUpdater_HEADER_ /*! \file GenericDataHeaderUpdater.h This file defines a class that updates the GenericDataHdr of an existing file. */ #include "calvin_files/data/src/FileHeader.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class updates the GenericDataHeader of an existing file with new information. */ class GenericDataHeaderUpdater { public: static const int32_t FIELD_LEN_SIZE = sizeof(int32_t); /*! Constructor */ GenericDataHeaderUpdater(); /*! Destructor */ ~GenericDataHeaderUpdater(); /*! Update the file with the new GenericDataHeader information. * The current implementation just updates the FileId. * @param fileStream An output stream open with std::ios::out|std::ios::binary|std::ios::in. * @param updateHeader A GenericDataHeader with information to update the file. * @param currentHdr The GenericDataHeader object with the information currently in the file. * @return Returns true if the FileId could be udpated. */ bool Update(std::ofstream& fileStream, GenericDataHeader& updateHeader, GenericDataHeader& currentHdr); /*! Update the FileId. Only update the FileId if update and current fileIDs are the same size. * @return Returns true if successful. */ bool UpdateFileId(); /*! Update the parameter list. * Can only update types that have the same name and type and value size in the source and target */ void UpdateParameters(); private: /*! Get the size of the FileHeader in bytes. * @return The size of the FileHeader. */ int32_t GetFileHeaderSize(); /*! Get the size of the GenericDataHeader up to the start of the parameter list. * @return The size of the GenericDataHeader up to the start of the parameter list. */ int32_t GetBytesFromGenericDataHdrStartToParameterList(); private: /*! Open output filestream */ std::ofstream* os; /*! A GenericDataHeader with information to use to update the target*/ GenericDataHeader* updateHdr; /*! The GenericDataHeader with current information */ GenericDataHeader* currentHdr; }; } #endif // _GenericDataHeaderUpdater_HEADER_ affxparser/src/fusion/calvin_files/writers/src/GenericDataHeaderWriter.cpp0000644000175200017520000000557114516003651030144 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/GenericDataHeaderWriter.h" // #include "calvin_files/writers/src/FileOutput.h" // using namespace affymetrix_calvin_io; void GenericDataHeaderWriter::Write(std::ofstream &os, GenericDataHeader &g) { WriteFileTypeId(os, g); WriteFileId(os, g); WriteFileCreationTime(os, g); WriteLocale(os, g); WriteNameValParamCnt(os, g); WriteNameValParams(os, g); WriteParentHdrCnt(os, g); WriteParentHdrs(os, g); } void GenericDataHeaderWriter::WriteFileTypeId(std::ofstream &os, const GenericDataHeader &g) const { FileOutput::WriteString8(os, g.GetFileTypeId()); } void GenericDataHeaderWriter::WriteFileId(std::ofstream &os, const GenericDataHeader &g) const { FileOutput::WriteString8(os, g.GetFileId()); } void GenericDataHeaderWriter::WriteFileCreationTime(std::ofstream &os, const GenericDataHeader &g) const { FileOutput::WriteString16(os, g.GetFileCreationTime()); } void GenericDataHeaderWriter::WriteLocale(std::ofstream &os, const GenericDataHeader &g) const { FileOutput::WriteString16(os, g.GetLocale()); } void GenericDataHeaderWriter::WriteNameValParamCnt(std::ofstream &os, const GenericDataHeader &g) const { FileOutput::WriteInt32(os, g.GetNameValParamCnt()); } void GenericDataHeaderWriter::WriteNameValParams(std::ofstream &os, GenericDataHeader &g) { ParameterNameValueTypeIt begin; ParameterNameValueTypeIt end; g.GetNameValIterators(begin, end); while(begin != end) { FileOutput::WriteString16(os, begin->GetName()); MIMEValue mv = begin->GetMIMEValue(); u_int32_t sz; const void* ptr = mv.GetValue(sz); FileOutput::WriteBlob(os, ptr, sz); FileOutput::WriteString16(os, begin->GetMIMEType()); begin++; } } void GenericDataHeaderWriter::WriteParentHdrCnt(std::ofstream &os, const GenericDataHeader &g) const { FileOutput::WriteInt32(os, g.GetParentCnt()); } void GenericDataHeaderWriter::WriteParentHdrs(std::ofstream &os, GenericDataHeader &g) { GenDataHdrVectorIt begin; GenDataHdrVectorIt end; g.GetParentIterators(begin, end); while(begin != end) { Write(os, *begin); begin++; } } affxparser/src/fusion/calvin_files/writers/src/GenericDataHeaderWriter.h0000644000175200017520000000371214516003651027604 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GenericDataHeaderWriter_HEADER_ #define _GenericDataHeaderWriter_HEADER_ #include "calvin_files/data/src/GenericDataHeader.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class GenericDataHeaderWriter { public: GenericDataHeaderWriter() {} ~GenericDataHeaderWriter() {} void Write(std::ofstream &os, GenericDataHeader &g); private: void WriteFileTypeId(std::ofstream &os, const GenericDataHeader &g) const; void WriteFileId(std::ofstream &os, const GenericDataHeader &g) const; void WriteFileCreationTime(std::ofstream &os, const GenericDataHeader &g) const; void WriteLocale(std::ofstream &os, const GenericDataHeader &g) const; void WriteNameValParamCnt(std::ofstream &os, const GenericDataHeader &dc) const; void WriteNameValParams(std::ofstream &os, GenericDataHeader &g); void WriteParentHdrCnt(std::ofstream &os, const GenericDataHeader &dc) const; void WriteParentHdrs(std::ofstream &os, GenericDataHeader &g); }; } #endif // _GenericDataHeaderWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/GenericFileUpdater.cpp0000644000175200017520000001003214516003651027155 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/GenericFileUpdater.h" // using namespace affymetrix_calvin_io; GenericFileUpdater::GenericFileUpdater(const std::string& path) { fileHdr = ReadFileHeader(path); writer = new GenericFileWriter(&fileHdr, false); lastDataGrpOffset = FindLastDataGroupOffset(); lastDataGrpDataSetCnt = GetLastDataGroupDataSetCnt(); } GenericFileUpdater::~GenericFileUpdater() { delete writer; } DataSetWriter* GenericFileUpdater::GetUpdateDataSetWriter(DataSetHeader& dataSetHdr) { DataSetHeader lastDataSetHdr; if(FindLastDataSetHeader(fileHdr, lastDataSetHdr)) { u_int32_t offset = 0; if(dataSetHdr.GetName().compare(lastDataSetHdr.GetName()) == 0) { //overwrite last dataset offset = lastDataSetHdr.GetHeaderStartFilePos(); } else { //append dataset offset = lastDataSetHdr.GetNextSetFilePos(); //increment data set count IncrementDataSetCnt(); } u_int32_t lastDataGrpIndex = fileHdr.GetDataGroupCnt() - 1; writer->SeekFromBeginPos(offset); return writer->GetDataGroupWriter(lastDataGrpIndex).CreateDataSetWriter(dataSetHdr); } return 0; } //DataSetWriter* GenericFileUpdater::WriteDataSet(GenericFileWriter& writer, u_int32_t grpIndex, u_int32_t offset, DataSetHeader& dataSetHdr, Uint8Vector& data) const //{ // writer.SeekFromBeginPos(offset); // DataSetWriter* dsWriter = writer.GetDataGroupWriter(grpIndex).CreateDataSetWriter(dataSetHdr); // WriteDataSet(dsWriter, dataSetHdr, data); // delete dsWriter; //} //void GenericFileUpdater::WriteDataSet(DataSetWriter* writer, DataSetHeader& dataSetHdr, std::string& data) const //{ // writer->WriteHeader(); // for(size_t i = 0; i = data.size(); i++) // { // writer->Write(data[i]); // } // writer->UpdateNextDataSetOffset(); //} FileHeader GenericFileUpdater::ReadFileHeader(const std::string& path) const { GenericData gData; GenericFileReader reader; reader.SetFilename(path); reader.ReadHeader(gData); gData.Close(); return gData.Header(); } bool GenericFileUpdater::FindLastDataSetHeader(FileHeader& fileHdr, DataSetHeader& dataSetHdr) const { int32_t index = fileHdr.GetDataGroupCnt(); if(index > 0) { DataGroupHeader dataGrpHdr = fileHdr.GetDataGroup(index - 1); index = dataGrpHdr.GetDataSetCnt(); if(index > 0) { dataSetHdr = dataGrpHdr.GetDataSet(index - 1); return true; } } return false; } void GenericFileUpdater::IncrementDataSetCnt() { lastDataGrpDataSetCnt++; u_int32_t currentPos = writer->GetFilePos(); writer->SeekFromBeginPos(lastDataGrpOffset + 8); writer->Write(lastDataGrpDataSetCnt); writer->SeekFromBeginPos(currentPos); } u_int32_t GenericFileUpdater::GetLastDataGroupDataSetCnt() { DataGroupHdrIt begin; DataGroupHdrIt end; fileHdr.GetDataGroupIts(begin, end); while(begin != end) { if(begin->GetNextGroupPos() == 0) { return begin->GetDataSetCnt(); } begin++; } return 0; } u_int32_t GenericFileUpdater::FindLastDataGroupOffset() { DataGroupHdrIt begin; DataGroupHdrIt end; fileHdr.GetDataGroupIts(begin, end); u_int32_t currentOffset = fileHdr.GetFirstDataGroupFilePos(); while(begin != end) { if(begin->GetNextGroupPos() == 0) { break; } else { currentOffset = begin->GetNextGroupPos(); } begin++; } return currentOffset; } affxparser/src/fusion/calvin_files/writers/src/GenericFileUpdater.h0000644000175200017520000000365314516003651026635 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GenericFileUpdater_HEADER_ #define _GenericFileUpdater_HEADER_ #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/parsers/src/GenericFileReader.h" #include "calvin_files/writers/src/GenericFileWriter.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class GenericFileUpdater { private: GenericFileWriter* writer; FileHeader fileHdr; u_int32_t lastDataGrpOffset; u_int32_t lastDataGrpDataSetCnt; public: GenericFileUpdater(const std::string& path); ~GenericFileUpdater(); DataSetWriter* GetUpdateDataSetWriter(DataSetHeader& dataSetHdr); void IncrementDataSetCnt(); private: void AppendDataSet(DataSetHeader& hdr, Uint8Vector& data, u_int32_t offset) const; bool FindLastDataSetHeader(FileHeader& fileHdr, DataSetHeader& dataSetHdr) const; FileHeader ReadFileHeader(const std::string& path) const; u_int32_t FindLastDataGroupOffset(); u_int32_t GetLastDataGroupDataSetCnt(); }; } #endif // _GenericFileUpdater_HEADER_ affxparser/src/fusion/calvin_files/writers/src/GenericFileWriter.cpp0000644000175200017520000000610214516003651027030 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/GenericFileWriter.h" #include "calvin_files/utils/src/StringUtils.h" // #include "util/Fs.h" // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_utilities; GenericFileWriter::GenericFileWriter(FileHeader* p, bool truncate) { fileHdr = p; OpenFileOStream(fileHdr->GetFilename(), truncate); CreateWriters(); } GenericFileWriter::~GenericFileWriter() { writers.clear(); os.flush(); os.close(); } int32_t GenericFileWriter::GetFilePos() { return os.tellp(); } void GenericFileWriter::SeekFromCurrentPos(int32_t offset) { os.seekp(offset, std::ios::cur); } void GenericFileWriter::SeekFromBeginPos(int32_t offset) { os.seekp(offset, std::ios::beg); } void GenericFileWriter::SeekFromEndPos(int32_t offset) { os.seekp(offset, std::ios::end); } void GenericFileWriter::WriteHeader() { FileHeaderWriter writer; writer.Write(os, *fileHdr); writer.UpdateDataGroupOffset(os, os.tellp()); } DataGroupWriter& GenericFileWriter::GetDataGroupWriter(int32_t index) { return writers[index]; } int32_t GenericFileWriter::GetDataGroupCnt() { return (int32_t)writers.size(); } void GenericFileWriter::GetDataGroupWriters(DataGroupWriterIt &begin, DataGroupWriterIt &end) { begin = writers.begin(); end = writers.end(); } void GenericFileWriter::OpenFileOStream(const std::string &file, bool truncate) { if(truncate) { Fs::aptOpen(os, file, std::ios::out|std::ios::trunc|std::ios::binary); } else { Fs::aptOpen(os,file, std::ios::out|std::ios::binary|std::ios::in); } if (!os.is_open() && !os.good()) { affymetrix_calvin_exceptions::CalvinException e(L"Could not open file \'" + StringUtils::ConvertMBSToWCS(file) + L"\'", -1); throw e; } } void GenericFileWriter::CreateWriters() { int sz = fileHdr->GetDataGroupCnt(); for(int i = 0; i < sz; i++) { DataGroupWriter* p = CreateDataGroupWriter(fileHdr->GetDataGroup(i)); writers.push_back(*p); delete p; } } DataGroupWriter* GenericFileWriter::CreateDataGroupWriter(DataGroupHeader& hdr) { return new DataGroupWriter(&os, &hdr); } void GenericFileWriter::Write(u_int32_t p) { FileOutput::WriteUInt32(os, p); } affxparser/src/fusion/calvin_files/writers/src/GenericFileWriter.h0000644000175200017520000000425414516003651026503 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GenericFileWriter_HEADER_ #define _GenericFileWriter_HEADER_ #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/writers/src/DataGroupHeaderWriter.h" #include "calvin_files/writers/src/DataGroupWriter.h" #include "calvin_files/writers/src/FileHeaderWriter.h" #include "calvin_files/writers/src/FileOutput.h" #include "calvin_files/writers/src/FileWriteException.h" // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { class GenericFileWriter { private: std::ofstream os; FileHeader* fileHdr; DataGroupWriterVector writers; public: GenericFileWriter(FileHeader* p, bool truncate=true); ~GenericFileWriter(); void WriteHeader(); void GetDataGroupWriters(DataGroupWriterIt &begin, DataGroupWriterIt &end); DataGroupWriter& GetDataGroupWriter(int32_t index); int32_t GetDataGroupCnt(); int32_t GetFilePos(); void SeekFromCurrentPos(int32_t offset); void SeekFromBeginPos(int32_t offset); void SeekFromEndPos(int32_t offset); DataGroupWriter* CreateDataGroupWriter(DataGroupHeader& hdr); void Write(u_int32_t p); std::ofstream& GetFileOStream() { return os; } private: void OpenFileOStream(const std::string &filename, bool truncate); void CreateWriters(); }; } #endif // _GenericFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/GridControlFileWriter.cpp0000644000175200017520000001121314516003651027701 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/GridControlFileWriter.h" // #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/parameter/src/Parameter.h" #include "calvin_files/parsers/src/GridControlFileReader.h" #include "calvin_files/writers/src/GenericFileWriter.h" // using namespace affymetrix_calvin_io; using namespace std; using namespace affymetrix_calvin_parameter; using namespace affymetrix_grid_control; /* * Initialize the members. */ GridControlFileWriter::GridControlFileWriter() { } /* * Clean up any used memory. */ GridControlFileWriter::~GridControlFileWriter() { } /* * Write the grid data to the GRC file. */ bool GridControlFileWriter::Write(const string &fileName, GridControlData &data) { try { // Create a header object. FileHeader header; header.SetFilename(fileName); // Set the file type. GenericDataHeader *genericHeader = header.GetGenericDataHdr(); genericHeader->SetFileId(GRD_FILE_TYPE_IDENTIFIER); genericHeader->SetFileTypeId(GRD_FILE_TYPE_IDENTIFIER); // Add the parameters ParameterNameValueType param; param.SetName(GRD_ROWS_PARAMETER_NAME); param.SetValueInt32(data.GetRows()); genericHeader->AddNameValParam(param); param.SetName(GRD_COLUMNS_PARAMETER_NAME); param.SetValueInt32(data.GetColumns()); genericHeader->AddNameValParam(param); // Cube objects DataGroupHeader dataGroupHeader; DataSetHeader dataSetHeader; // Create the data dataGroup. dataGroupHeader.SetName(GRD_FILE_COORDINATE_GROUP_NAME); // Create the B1 dataSet dataSetHeader.SetName(GRD_FILE_B1_SET_NAME); dataSetHeader.AddUShortColumn(L""); // TBD: add column name dataSetHeader.AddUShortColumn(L""); dataSetHeader.SetRowCnt(data.GetNumB1Probes()); dataGroupHeader.AddDataSetHdr(dataSetHeader); dataSetHeader.Clear(); // Create the B2 dataSet dataSetHeader.SetName(GRD_FILE_B2_SET_NAME); dataSetHeader.AddUShortColumn(L""); dataSetHeader.AddUShortColumn(L""); dataSetHeader.SetRowCnt(data.GetNumB2Probes()); dataGroupHeader.AddDataSetHdr(dataSetHeader); dataSetHeader.Clear(); // Create the NS dataSet dataSetHeader.SetName(GRD_FILE_NS_SET_NAME); dataSetHeader.AddUShortColumn(L""); dataSetHeader.AddUShortColumn(L""); dataSetHeader.SetRowCnt(data.GetNumNSProbes()); dataGroupHeader.AddDataSetHdr(dataSetHeader); dataSetHeader.Clear(); // Add the dataGroup. header.AddDataGroupHdr(dataGroupHeader); // Write the file. GenericFileWriter writer(&header); // First the header. writer.WriteHeader(); // Next the dataGroup header DataGroupWriter &dataGroupWriter = writer.GetDataGroupWriter(0); dataGroupWriter.WriteHeader(); // Next the B1 dataSet DataSetWriter &dataSetWriter = dataGroupWriter.GetDataSetWriter(0); dataSetWriter.WriteHeader(); int nProbes = data.GetNumB1Probes(); u_int16_t coordVal; for (int i=0; i #include #include // namespace affymetrix_calvin_io { /*! This class provide write capabilities for GRC files. */ class GridControlFileWriter { public: /*! Constructor */ GridControlFileWriter(); /*! Destructor */ ~GridControlFileWriter(); /*! Write a new GRC file. * @param fileName The name of the GRC file. * @param data The grid control data to write to the file. * @return True if successful. */ bool Write(const std::string &fileName, affymetrix_grid_control::GridControlData &data); }; }; #endif // _GridControlFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/MMDATFileWriter.cpp0000644000175200017520000001123614516003651026322 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/MMDATFileWriter.h" // #include "calvin_files/data/src/DATData.h" // /* General Description of the file writing process * The MMDATFileWriter extends the DATFileWriter by adding memory-mapped view * to the pixel DataSet data. * The DATFileWriter constructor writes the FileHeader, GenericDataHeader, DataGroupHeader * and DataSetHeader using fstream. The header positions are computed and written in the proper * place at construction. * The Open method opens a memory mapped view on the pixel DataSet data section. Data can be * written to the file by changing the data in the memory mapped view. The view can also * be moved over the pixel data to access all the data. The view size is restricted to * 200 MB at a time. * Closing the file will also cause the pixel data to be written to the file in network byte-order. * Warning: Using the same DATData object to re-open the file with the MMDATFileWriter would make it * possible to access the pixel data that is not in host byte-order. You do not want to do this. * Once the MMDATFileWriter is closed, the file should be opened with the DATFileReader to access * the data. Re-opening a file with the MMDATFileWriter to overwrite an existing file is OK. */ using namespace affymetrix_calvin_io; /* * Constructor */ MMDATFileWriter::MMDATFileWriter(DATData &p) : DATFileWriter(p) { pixelWriter = 0; filename = p.GetFilename(); dataSetHdr = p.GetFileHeader()->GetDataGroup(0).FindDataSetHeader(DAT_PIXEL); rows = p.GetRows(); cols = p.GetCols(); } /* * Destructor */ MMDATFileWriter::~MMDATFileWriter() { Close(); } /* * Opens the writer. */ bool MMDATFileWriter::Open() { if (pixelWriter == 0) { if (dataSetHdr != 0) { pixelWriter = new MMDataSetDataWriter(*dataSetHdr, filename); } else return false; } if (pixelWriter) return pixelWriter->Open(); else return false; } /* * Closes the writer. */ bool MMDATFileWriter::Close() { if (pixelWriter) { HTONPixelDataSet(); bool ret = pixelWriter->Close(); delete pixelWriter; pixelWriter = 0; return ret; } else return true; } /* * Maps the pixel data for access starting at firstRow and continuing for rowsToMap. */ void MMDATFileWriter::MapPixelData(int32_t firstRow, int32_t rowsToMap) { if (pixelWriter) { pixelWriter->MapData(firstRow*cols, rowsToMap*cols); } } /* * Get a pointer to the pixel data that has been mapped for access. */ u_int16_t* MMDATFileWriter::GetMappedPixelDataPtr() { if (pixelWriter) { return (u_int16_t*)pixelWriter->GetMappedDataPtr(); } return 0; } /* * Get the first DAT image row that has been mapped. */ int32_t MMDATFileWriter::GetFirstPixelRowMapped() { if (pixelWriter) { return pixelWriter->GetFirstRowMapped()/cols; } return 0; } /* * Get the number of DAT image rows that have been mapped. */ int32_t MMDATFileWriter::GetPixelRowsMapped() { if (pixelWriter) { return pixelWriter->GetRowsMapped()/cols; } return 0; } /* * Get the suggested maximum number of DAT image rows to map. */ int32_t MMDATFileWriter::GetMaxPixelRowsToMap() { if (pixelWriter) { return pixelWriter->GetMaxRowsToMap()/cols; } return 0; } /* * Writes the pixel DataSet data in network byte order. */ void MMDATFileWriter::HTONPixelDataSet() { if (pixelWriter) { pixelWriter->Close(); if (pixelWriter->Open()) { bool stop = false; do { int32_t firstPixel = pixelWriter->GetFirstRowMapped(); int32_t numPixelsMapped = pixelWriter->GetRowsMapped(); u_int16_t* pixel = (u_int16_t*)pixelWriter->GetMappedDataPtr(); for (int32_t i = 0; i < numPixelsMapped; ++i, ++pixel) { *pixel = htons(*pixel); } if (firstPixel+numPixelsMapped >= rows*cols) stop = true; else pixelWriter->MapData(firstPixel+numPixelsMapped, pixelWriter->GetMaxRowsToMap()); } while (stop == false); } } } affxparser/src/fusion/calvin_files/writers/src/MMDATFileWriter.h0000644000175200017520000001040214516003651025761 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _MMDATFileWriter_HEADER_ #define _MMDATFileWriter_HEADER_ /*! \file MMDATFileWriter.h This file defines a class that writes a DAT file. The pixel DataSet data * is written to the file using Windows memory-mapping. */ #include "calvin_files/data/src/DATData.h" #include "calvin_files/writers/src/DATFileWriter.h" #include "calvin_files/writers/src/GenericFileWriter.h" #include "calvin_files/writers/src/MMDataSetDataWriter.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class writes a DAT file. The pixel DataSet data is written using Windows memory-mapping */ class MMDATFileWriter : public DATFileWriter { public: /*! Constructor * Creates the DAT file and writes header sections of the file. * @param p The DATData object to write to the file. */ MMDATFileWriter(DATData &p); /*! Destructor */ ~MMDATFileWriter(); public: /*! Opens the writer. * This opens the file and a memory-mapped view of the pixel DataSet. * It closes a previously opened file. * @return true if it succeeded. Method may also throw exceptions. * @exception affymetrix_calvin_exceptions::FileCreateException */ bool Open(); /*! Closes the writer. * This writes the pixel data to the file in network order and closes the file. * Once the writer is closed it should not be used to open and read the pixel data, overwritting existing pixel * data is OK. * @return true if it succeeded. Method may also throw exceptions. */ bool Close(); public: /*! Maps the pixel data for access starting at firstRow and continuing for rowsToMap. * The number of pixels mapped is the number of row mapped times the number of pixels per row (DATData::GetCols). * @param firstRow First row of the DAT image to map for access. * @param rowsToMap Requested number of DAT image rows to map. Use GetRowsMapped for number of rows actually mapped. */ void MapPixelData(int32_t firstRow, int32_t rowsToMap); /*! Get a pointer to the pixel data that has been mapped for access. * @return Pointer to the mapped data. */ u_int16_t* GetMappedPixelDataPtr(); /*! Get the first DAT image row that has been mapped. * @return Index of the first DAT image row mapped for access. */ int32_t GetFirstPixelRowMapped(); /*! Get the number of DAT image rows that have been mapped. * The number of pixels mapped is the number of row mapped times the number of pixels per row (DATData::GetCols). * @return The number of DAT image rows mapped for access. */ int32_t GetPixelRowsMapped(); /*! Get the suggested maximum number of DAT image rows to map. * @return The suggested maximum number of DAT image rows to map */ int32_t GetMaxPixelRowsToMap(); private: virtual void WritePixels(const Uint16Vector &v) {} // hide this method /*! Writes the pixel DataSet data in network byte order. * Once this method is called this writer should not be used * to read the pixel DataSet data. */ void HTONPixelDataSet(); private: /*! Memory-mapped DataSet data writer */ MMDataSetDataWriter* pixelWriter; /*! DAT file name */ std::string filename; /*! Pointer to the pixel DataSetHeader */ affymetrix_calvin_io::DataSetHeader* dataSetHdr; /*! Number of rows of pixels in the DAT image */ int32_t rows; /*! Number of columns of pixels in the DAT image */ int32_t cols; }; } #endif // _MMDATFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/MMDataSetDataWriter.cpp0000644000175200017520000000645414516003651027237 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/MMDataSetDataWriter.h" // #include "calvin_files/writers/src/DataSetHeaderWriter.h" #include "calvin_files/writers/src/FileOutput.h" #include "calvin_files/writers/src/FileWriteException.h" // using namespace affymetrix_calvin_io; /*! maximum file mapping size = 200 MB */ const int32_t MAXFILEMAPSIZE = 1024*1024*200; /* * Constructor */ MMDataSetDataWriter::MMDataSetDataWriter(DataSetHeader& dsHdr, const std::string& name) : dataSetHeader(dsHdr), filename(name) { dataSetDataSize = dataSetHeader.GetDataSize(); bytesPerRow = dataSetHeader.GetRowSize(); } /* * Destructor */ MMDataSetDataWriter::~MMDataSetDataWriter() { Close(); } /* * Opens the writer. */ bool MMDataSetDataWriter::Open() { Close(); return OpenData(); } /* * Open a memory-mapped view of the DataSet data section. */ bool MMDataSetDataWriter::OpenData() { if (dataSetHeader.GetRowCnt() == 0 || dataSetHeader.GetColumnCnt() == 0) { return false; } int32_t rowsToMap = GetMaxRowsToMap(); if (rowsToMap > dataSetHeader.GetRowCnt()) rowsToMap = dataSetHeader.GetRowCnt(); mmfile.SetFilename(filename); if (mmfile.Open(READ_AND_WRITE, ALLOWREAD_AND_WRITE) && mmfile.MapData(dataSetHeader.GetDataStartFilePos(), rowsToMap*bytesPerRow)) { } else { return false; } return true; } /* * Closes the writer. */ bool MMDataSetDataWriter::Close() { mmfile.Close(); return true; } /* * Get the suggested maximum number of DataSet rows to map. */ int32_t MMDataSetDataWriter::GetMaxRowsToMap() { if (bytesPerRow == 0) return 0; return MAXFILEMAPSIZE / bytesPerRow; } /* * Maps the pixel data for access starting at firstRow and continuing for rowsToMap. */ bool MMDataSetDataWriter::MapData(int32_t firstRow, int32_t rowsToMap) { if (firstRow + rowsToMap > dataSetHeader.GetRowCnt()) rowsToMap = dataSetHeader.GetRowCnt() - firstRow; return mmfile.MapData(firstRow*bytesPerRow+dataSetHeader.GetDataStartFilePos(), rowsToMap*bytesPerRow); } /* * Get a pointer to the data that has been mapped for access. */ char* MMDataSetDataWriter::GetMappedDataPtr() { return mmfile.GetDataPtr(); } /* * Get the first DataSet row that has been mapped. */ int32_t MMDataSetDataWriter::GetFirstRowMapped() { return ( (mmfile.GetFirstMappedBytePos()-dataSetHeader.GetDataStartFilePos())/bytesPerRow ); } /* * Get the number of DataSet rows that have been mapped. */ int32_t MMDataSetDataWriter::GetRowsMapped() { return ( mmfile.GetBytesMapped()/bytesPerRow ); } affxparser/src/fusion/calvin_files/writers/src/MMDataSetDataWriter.h0000644000175200017520000000742114516003651026677 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _MMDataSetDataWriter_HEADER_ #define _MMDataSetDataWriter_HEADER_ /*! \file MMDataSetDataWriter.h This file defines a class that writes the data section of a DataSet using Windows memory-mapping. */ #include "calvin_files/data/src/DataSetHeader.h" #include "calvin_files/data/src/MemMapFile.h" #include "calvin_files/writers/src/DataSetHeaderWriter.h" // #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class writes a DataSet data. The DataSet data is written to the file using memory-mapping. * This class assumes that the DataSetHeaderWriter has already written the DataSetHeader * to the file or more accurately, that the file has already been created by the GenericFileWriter. */ class MMDataSetDataWriter { public: /*! Constructor * @param dsHdr The DataSetHeader object that is to be written to the file. * @param name DAT file name. */ MMDataSetDataWriter(DataSetHeader& dsHdr, const std::string& name); /*! Destructor */ ~MMDataSetDataWriter(); public: /*! Opens the writer. * It closes the previously opened view and opens a memory-mapped view of the first section of the DataSet. * @return true if it succeeded. Method may also throw exceptions. * @exception affymetrix_calvin_exceptions::FileCreateException */ bool Open(); /*! Closes the writer. * This closes the memory-mapped view. * @return true if it succeeded. Method may also throw exceptions. */ bool Close(); /*! Maps the pixel data for access starting at firstRow and continuing for rowsToMap. * @param firstRow First DataSet row to map for access. * @param rowsToMap Requested number of DataSet rows to map. Use GetRowsMapped for number of rows actually mapped. * @return True if successful. */ bool MapData(int32_t firstRow, int32_t rowsToMap); /*! Get a pointer to the data that has been mapped for access. * @return Pointer to the mapped data. */ char* GetMappedDataPtr(); /*! Get the first DataSet row that has been mapped. * @return Index of the first DataSet row mapped for access. */ int32_t GetFirstRowMapped(); /*! Get the number of DataSet rows that have been mapped. * @return The number of DataSet rows mapped for access. */ int32_t GetRowsMapped(); /*! Get the suggested maximum number of DataSet rows to map. * @return The suggested maximum number of DataSet rows to map. */ int32_t GetMaxRowsToMap(); protected: /*! Open a memory-mapped view of the DataSet data section. * @return True if successful. */ bool OpenData(); private: /*! Windows memory-mapped file access */ MemMapFile mmfile; /*! DataSetHeader */ DataSetHeader& dataSetHeader; /*! Name of the file to memory map */ std::string filename; /*! Size of the data section of the DataSet */ int32_t dataSetDataSize; /*! Bytes per DataSet row */ int32_t bytesPerRow; }; } #endif // _MMDataSetDataWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/MultiChannelCelFileCollater.cpp0000644000175200017520000001312214516003651030754 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/MultiChannelCelFileCollater.h" using namespace affymetrix_calvin_io; /* * Default constructor */ MultiChannelCelFileCollater::MultiChannelCelFileCollater() { wavelength = NO_WAVELENGTH; } /* * Destructor */ MultiChannelCelFileCollater::~MultiChannelCelFileCollater() { } void MultiChannelCelFileCollater::OpenReader(const std::string& file, GenericData& data) { data.Clear(); GenericFileReader reader; reader.SetFilename(file); reader.Open(data); reader.Close(); } void MultiChannelCelFileCollater::Collate(const StringVector& celFiles, const std::string& outFile) { FileHeader outFileHdr; outFileHdr.SetFilename(outFile); CopyHeaders(celFiles, outFileHdr); GenericFileWriter writer(&outFileHdr); writer.WriteHeader(); WriteDataGroups(celFiles, writer); } void MultiChannelCelFileCollater::CopyHeaders(const StringVector& celFiles, FileHeader& outFileHdr) { //read the first cel file header GenericData gReader; OpenReader(celFiles[0], gReader); CopyFileHdr(gReader.Header(), outFileHdr); DataGroupHeader dstGrp; CopyDataGroupHdr(gReader.Header().GetDataGroup(0), dstGrp); outFileHdr.AddDataGroupHdr(dstGrp); for(u_int32_t i = 1; i < celFiles.size(); i++) { //read the remaining cel file headers OpenReader(celFiles[i], gReader); DataGroupHeader dstGrp; UpdateWavelength(gReader.Header().GetGenericDataHdr()); CopyDataGroupHdr(gReader.Header().GetDataGroup(0), dstGrp); outFileHdr.AddDataGroupHdr(dstGrp); } gReader.Clear(); } void MultiChannelCelFileCollater::UpdateWavelength(GenericDataHeader* hdr) { ParameterNameValueType p; if(hdr->FindNameValParam(AFFY_FILTER_WAVELENGTH, p)) { wavelength = p.GetValueText(); } else { wavelength = NO_WAVELENGTH; } } void MultiChannelCelFileCollater::WriteDataGroups(const StringVector& celFiles, GenericFileWriter& writer) { int dataGroupCnt = writer.GetDataGroupCnt(); for(int i = 0; i < dataGroupCnt; i++) { GenericFileReader reader; GenericData data; reader.SetFilename(celFiles[i]); reader.Open(data); DataGroupWriter grpWriter = writer.GetDataGroupWriter(i); DataGroupReader dgReader = reader.GetDataGroupReader(0); WriteDataGroup(dgReader, data.Header().GetDataGroup(0), grpWriter, i >= (dataGroupCnt - 1)); } } void MultiChannelCelFileCollater::WriteDataGroup(DataGroupReader& grpReader, DataGroupHeader& grpHdr, DataGroupWriter& grpWriter, bool lastGroup) { grpWriter.WriteHeader(); for(int i = 0; i < grpReader.GetDataSetCnt(); i++) { DataSetReader reader = grpReader.GetDataSetReader(i); WriteDataSet(reader, grpHdr.GetDataSet(i), grpWriter.GetDataSetWriter(i)); } if (!lastGroup) { grpWriter.UpdateNextDataGroupPos(); } } void MultiChannelCelFileCollater::WriteDataSet(DataSetReader& setReader, DataSetHeader& setHdr, DataSetWriter& setWriter) { setWriter.WriteHeader(); int sz = setHdr.GetDataSize(); char* buffer = new char[sz]; int total = 0; while(total < sz) { int count = setReader.ReadBuffer(buffer, sz); setWriter.WriteBuffer(buffer, count); total += count; } setWriter.UpdateNextDataSetOffset(); } void MultiChannelCelFileCollater::CopyFileHdr(FileHeader& src, FileHeader& dst) { CopyParentHeaders(src.GetGenericDataHdr(), dst.GetGenericDataHdr()); CopyGenericHeader(src.GetGenericDataHdr(), dst.GetGenericDataHdr()); } void MultiChannelCelFileCollater::CopyParentHeaders(GenericDataHeader* src, GenericDataHeader* dst) { GenDataHdrVectorIt begin, end; src->GetParentIterators(begin, end); while(begin != end) { dst->AddParent(*begin); begin++; } } void MultiChannelCelFileCollater::CopyGenericHeader(GenericDataHeader* src, GenericDataHeader* dst) { dst->SetFileTypeId(MULTI_CHANNEL_FILE_ID); CopyGenericHeaderParams(src, dst); } void MultiChannelCelFileCollater::CopyGenericHeaderParams(GenericDataHeader* src, GenericDataHeader* dst) { ParameterNameValueTypeIt begin, end; src->GetNameValIterators(begin, end); while(begin != end) { if(begin->GetName() == AFFY_FILTER_WAVELENGTH) { wavelength = begin->GetValueText(); } else { dst->AddNameValParam(*begin); } begin++; } } void MultiChannelCelFileCollater::CopyDataGroupHdr(DataGroupHeader& src, DataGroupHeader& dst) { dst.SetName(wavelength); DataSetHdrIt begin, end; src.GetDataSetIterators(begin, end); while(begin != end) { DataSetHeader dstSet; CopyDataSetHdr(*begin, dstSet); dst.AddDataSetHdr(dstSet); begin++; } } void MultiChannelCelFileCollater::CopyDataSetHdr(DataSetHeader& src, DataSetHeader& dst) { dst.SetName(src.GetName()); dst.SetRowCnt(src.GetRowCnt()); for(int i = 0; i < src.GetColumnCnt(); i++) { dst.AddColumn(src.GetColumnInfo(i)); } ParameterNameValueTypeIt begin, end; src.GetNameValIterators(begin, end); while(begin != end) { dst.AddNameValParam(*begin); begin++; } } affxparser/src/fusion/calvin_files/writers/src/MultiChannelCelFileCollater.h0000644000175200017520000000546414516003651030433 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _MultiChannelCelFileCollater_HDR_ #define _MultiChannelCelFileCollater_HDR_ #include #include "calvin_files/portability/src/AffymetrixBaseTypes.h" #include "calvin_files/utils/src/AffyStlCollectionTypes.h" #include "calvin_files/data/src/FileHeader.h" #include "calvin_files/data/src/GenericData.h" #include "calvin_files/parameter/src/ParameterNameValueType.h" #include "calvin_files/writers/src/GenericFileWriter.h" #include "calvin_files/parsers/src/GenericFileReader.h" #include "calvin_files/writers/src/CombinedDatConstants.h" #ifdef _MSC_VER #pragma warning(disable: 4290) // don't show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { #define NO_WAVELENGTH L"wavelength unavailable" #define MULTI_CHANNEL_FILE_ID "affymetrix-calvin-multi-intensity" class MultiChannelCelFileCollater { private: std::wstring wavelength; void CopyHeaders(const std::vector& celFiles, FileHeader& outFileHdr); void CopyDataSetHdr(DataSetHeader& src, DataSetHeader& dst); void CopyParentHeaders(GenericDataHeader* src, GenericDataHeader* dst); void CopyGenericHeader(GenericDataHeader* src, GenericDataHeader* dst); void CopyGenericHeaderParams(GenericDataHeader* src, GenericDataHeader* dst); void WriteDataGroups(const StringVector& celFiles, GenericFileWriter& writer); void UpdateWavelength(GenericDataHeader* hdr); protected: void CopyDataGroupHdr(DataGroupHeader& src, DataGroupHeader& dst); void CopyFileHdr(FileHeader& src, FileHeader& dst); void OpenReader(const std::string& file, GenericData& data); void WriteDataGroup(DataGroupReader& gReader, DataGroupHeader& fileHdr, DataGroupWriter& writer, bool lastGroup); void WriteDataSet(DataSetReader& setReader, DataSetHeader& setHdr, DataSetWriter& setWriter); public: MultiChannelCelFileCollater(); ~MultiChannelCelFileCollater(); void Collate(const StringVector& celFiles, const std::string& outFile); }; } #endif // _MultiChannelCelFileCollater_HDR_ affxparser/src/fusion/calvin_files/writers/src/TemplateFileWriter.cpp0000644000175200017520000002231214516003651027230 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/TemplateFileWriter.h" // #include "calvin_files/parsers/src/SAXTemplateHandlers.h" #include "calvin_files/template/src/TemplateData.h" #include "calvin_files/template/src/TemplateId.h" #include "calvin_files/utils/src/AffymetrixGuid.h" // #include #include #include #include #include #include // #include #include #include #include #include // //#include using namespace affymetrix_calvin_template; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; XERCES_CPP_NAMESPACE_USE #ifdef _MSC_VER #pragma warning(disable: 4996) // ignore deprecated functions warning #endif /*! The expected version number */ #define TEMPLATE_SET_FILE_VERSION_NUMBER "1.0" /*! This class provides utilities for converting native strings to XML strings. * This class is provided for platforms where the XMLCh is not a wchar_t (Mac OSX) */ class XMLChConversion { private: /*! The XML string */ XMLCh *str; /*! converts an int. * @param i The int. */ void convert(int i) { char cstr[64]; sprintf(cstr, "%d", i); convert(cstr); } /*! Converts a 8 bit string. * @param s The 8 bit string. */ void convert(const char *const s) { clear(); int n=(int)strlen(s); str = new XMLCh[n+1]; for (int i=0; icreateElement(ToXMLCh(TEMPLATE_FILE_ELEMENT)); arrayElement->setAttribute(ToXMLCh(TEMPLATE_FILE_ELEMENT_TYPE_ATTRIBUTE), ToXMLCh(TEMPLATE_FILE_TYPE_IDENTIFIER)); arrayElement->setAttribute(ToXMLCh(TEMPLATE_FILE_ELEMENT_VERSION_ATTRIBUTE), ToXMLCh(TEMPLATE_SET_FILE_VERSION_NUMBER)); arrayElement->setAttribute(ToXMLCh(TEMPLATE_FILE_ELEMENT_ID_ATTRIBUTE), ToXMLCh(templateData.TemplateFileIdentifier())); arrayElement->setAttribute(ToXMLCh(TEMPLATE_FILE_ELEMENT_CREATE_DATE_TIME_ATTRIBUTE), ToXMLCh(templateData.CreationDateTime())); arrayElement->setAttribute(ToXMLCh(TEMPLATE_FILE_ELEMENT_CREATED_BY_ATTRIBUTE), ToXMLCh(templateData.CreatedBy())); return arrayElement; } /* * Add the user attributes to the document if they exist. */ void AddUserAttributes(TemplateData &templateData, DOMDocument* doc, DOMElement* arrayElement) { int nUserAttributes = (int) templateData.UserAttributes().size(); if (nUserAttributes > 0) { DOMElement* userAttributesElement = doc->createElement(ToXMLCh(TEMPLATE_USER_ATTRIBUTES_ELEMENT)); affymetrix_calvin_parameter::ParameterNameValueDefaultRequiredTypeList::iterator paramIt; for (paramIt=templateData.UserAttributes().begin(); paramIt!=templateData.UserAttributes().end(); ++ paramIt) { ParameterNameValueDefaultRequiredType ¶m = *paramIt; DOMElement* paramElement = doc->createElement(ToXMLCh(TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT)); paramElement->setAttribute(ToXMLCh(TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_NAME_ATTRIBUTE), ToXMLCh(param.GetName())); paramElement->setAttribute(ToXMLCh(TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_TYPE_ATTRIBUTE), ToXMLCh( ParameterNameValueDefaultRequiredType::ParameterValueTypeToString(param.ValueType()))); if (param.RequiredFlag() == true) paramElement->setAttribute(ToXMLCh(TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_REQUIRED_ATTRIBUTE), ToXMLCh("Yes")); if (param.HasDefault() == true) paramElement->setAttribute(ToXMLCh(TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_ELEMENT_DEFAULT_ATTRIBUTE), ToXMLCh(param.DefaultToString())); if (param.ValueType() == ParameterNameValueDefaultRequiredType::ControlMultiParameterType) { std::list::iterator valIt; for (valIt = param.ControlMultiValues().begin(); valIt != param.ControlMultiValues().end(); ++valIt) { DOMElement *valueElement = doc->createElement(ToXMLCh(TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_VALUE_ELEMENT)); valueElement->setTextContent(ToXMLCh(*valIt)); paramElement->appendChild(valueElement); } } else { DOMElement *valueElement = doc->createElement(ToXMLCh(TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_VALUE_ELEMENT)); valueElement->setTextContent(ToXMLCh(param.DefaultToString())); paramElement->appendChild(valueElement); } int nControl = (int) param.ControlledVocabulary().size(); if (nControl > 0) { std::list::iterator controlIt; for (controlIt=param.ControlledVocabulary().begin(); controlIt!=param.ControlledVocabulary().end(); ++controlIt) { DOMElement* controlElement = doc->createElement(ToXMLCh(TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT)); controlElement->setAttribute(ToXMLCh(TEMPLATE_USER_ATTRIBUTES_ATTRIBUTE_CONTROL_ELEMENT_VALUE_ATTRIBUTE), ToXMLCh(*controlIt)); paramElement->appendChild(controlElement); } } userAttributesElement->appendChild(paramElement); } arrayElement->appendChild(userAttributesElement); } } /* * Initialize the class. */ TemplateFileWriter::TemplateFileWriter() { dataTypeIdentifier = TEMPLATE_FILE_TYPE_IDENTIFIER; } /* * Clear the data. */ TemplateFileWriter::~TemplateFileWriter() { } /* * Write the entire file, the header and body. */ bool TemplateFileWriter::Write(const std::string &fileName, affymetrix_calvin_template::TemplateData &templateData) { // Initialize the XML4C2 system. try { XMLPlatformUtils::Initialize(); } catch (const XMLException&) { return false; } // Create a DOM implementation object and create the document type for it. DOMImplementation *impl = DOMImplementationRegistry::getDOMImplementation(ToXMLCh(L"LS")); //DOMDocumentType* dt = impl->createDocumentType(ToXMLCh(TEMPLATE_FILE_ELEMENT), 0, ToXMLCh(TEMPLATE_FILE_DTD)); DOMDocument* doc = impl->createDocument(); //doc->setStandalone(true); //doc->appendChild(dt); // Create the serializer. DOMLSSerializer *theSerializer = ((DOMImplementationLS*)impl)->createLSSerializer(); DOMLSOutput *theOutputDesc = ((DOMImplementationLS*)impl)->createLSOutput(); //theSerializer->setEncoding(ToXMLCh(TEMPLATE_FILE_ENCODING)); theOutputDesc->setEncoding(ToXMLCh(TEMPLATE_FILE_ENCODING)); // TemplateFile element DOMElement* templateElement = CreateTemplateElement(templateData, doc, dataTypeIdentifier); // UserAttributes element AddUserAttributes(templateData, doc, templateElement); // Add the template element to the document. doc->appendChild(templateElement); // Write the file. bool status = false; XMLFormatTarget *myFormTarget = new LocalFileFormatTarget(fileName.c_str()); theOutputDesc->setByteStream(myFormTarget); try { theSerializer->write(doc, theOutputDesc); status = true; } catch (...) { status = false; } // Clean up doc->release(); theOutputDesc->release(); theSerializer->release(); delete myFormTarget; XMLPlatformUtils::Terminate(); return status; } affxparser/src/fusion/calvin_files/writers/src/TemplateFileWriter.h0000644000175200017520000000345014516003651026677 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _TemplateFileWriter_HEADER_ #define _TemplateFileWriter_HEADER_ /*! \file TemplateFileWriter.h This file provides interfaces to write a template file. */ #include "calvin_files/template/src/TemplateData.h" // namespace affymetrix_calvin_io { /*! This class provides interfaces to write a template file. */ class TemplateFileWriter { public: /*! Constructor */ TemplateFileWriter(); /*! Destructor */ ~TemplateFileWriter(); protected: /*! An identifier to the type of data stored in the file */ affymetrix_calvin_utilities::AffymetrixGuidType dataTypeIdentifier; public: /*! Writes the template object to an template file. * * @param fileName The name of the template file to write. * @param templateData The template data to write to the file. * @return True if the file was successfully written. */ bool Write(const std::string &fileName, affymetrix_calvin_template::TemplateData &templateData); }; }; #endif // _TemplateFileWriter_HEADER_ affxparser/src/fusion/calvin_files/writers/src/TextFileWriter.cpp0000644000175200017520000000461514516003651026407 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "calvin_files/writers/src/TextFileWriter.h" // #include "util/Fs.h" // using namespace affymetrix_calvin_io; /* * Initialize the class. */ TextFileWriter::TextFileWriter() { } /* * Clean up. */ TextFileWriter::~TextFileWriter() { } /* * Write the entire file, the header and body. */ void TextFileWriter::WriteFile(const std::string &fileName, std::map &textData) { OpenFile(fileName); WriteFileBody(textData); CloseFile(); } /* * Open the file stream. */ void TextFileWriter::OpenFile(const std::string &fileName) { Fs::aptOpen(fileStream, fileName, std::ios::out); if (!fileStream.is_open() && !fileStream.good()) { affymetrix_calvin_exceptions::FileCreateException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } /* * Closes the file stream. */ void TextFileWriter::CloseFile() { fileStream.close(); if (fileStream.fail()) { affymetrix_calvin_exceptions::FileWriteException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0); throw e; } } /* * The fhe parameters to the file in the format name=value. */ void TextFileWriter::WriteFileBody(std::map &textData) { std::map::iterator it; for (it=textData.begin(); it!=textData.end(); it++) { fileStream << (*it).first << "=" << (*it).second << std::endl; } } affxparser/src/fusion/calvin_files/writers/src/TextFileWriter.h0000644000175200017520000000457714516003651026063 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _TextFileWriter_HEADER_ #define _TextFileWriter_HEADER_ /*! \file TextFileWriter.h This file provides interfaces to store information in a text parameter file. */ #include "calvin_files/writers/src/FileWriteException.h" // #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4290) // dont show warnings about throw keyword on function declarations. #endif namespace affymetrix_calvin_io { /*! This class provides interfaces to store text parameter information. */ class TextFileWriter { public: /*! Constructor */ TextFileWriter(); /*! Destructor */ ~TextFileWriter(); protected: /*! The file stream. */ std::ofstream fileStream; /*! Opens the file for writing. * * @param fileName The name of the file to open. */ void OpenFile(const std::string &fileName); /*! Writes the body of the file. * * @param textData The parameter name/value pairs. */ void WriteFileBody(std::map &textData); /*! Closes the file */ void CloseFile(); public: /*! Writes the parameters to the text file. * * @param fileName The name of the text file to write. * @param textData The text file parameter name/value pairs. * @exception affymetrix_calvin_exceptions::CFileCreateException The output file cannot be created. * @exception affymetrix_calvin_exceptions::CFileWriteException The file was not written properly. */ void WriteFile(const std::string &fileName, std::map &textData); }; }; #endif // _TextFileWriter_HEADER_ affxparser/src/fusion/file/0000755000175200017520000000000014516022540016745 5ustar00biocbuildbiocbuildaffxparser/src/fusion/file/1LQFileData.cpp0000644000175200017520000001157014516003651021446 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/1LQFileData.h" // #include "file/FileIO.h" // #include #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #endif using namespace affx1lq; using namespace std; /*! The maximum line length in a 1LQ file */ #define MAXLINELENGTH 1024 /* * Initialize the members. */ C1LQFileData::C1LQFileData() : m_NumberRows(0) , m_NumberColumns(0) { } /* * Free all memory used when reading the 1LQ file. */ C1LQFileData::~C1LQFileData() { Clear(); } /* * Free all memory used when reading the 1LQ file. */ void C1LQFileData::Clear() { m_NumberRows=0; m_NumberColumns=0; m_Entries.clear(); } /* * Check if the file exists. */ bool C1LQFileData::Exists() { // Find the file stats. struct stat st; return (stat(m_FileName.c_str(), &st) == 0); } /* * Open the file and read the header and data sections. If the file * fails to open then return a false. */ bool C1LQFileData::Read() { // Clear memory first. Clear(); // Open the file. ifstream instr; instr.open(m_FileName.c_str(), ios::in | ios::binary); // Check if open if (!instr) { return false; } // Read the header if (Read(instr) == false) { Clear(); return false; } return true; } /* * Read the remaining lines in the file (skipping the column header line). * These are all data lines. * Parse each line and add it to the data list. * If the number of columns is not what is expected then return a false. * Ignore any lines that begin with a #. */ bool C1LQFileData::Read(std::ifstream &instr) { const int maxSize = 128; char line[MAXLINELENGTH]; char qual[maxSize]; char feat[maxSize]; char probe[maxSize]; DataEntry entry; const char *format = "%d %d %s %d %s %s %d %d %d %c %c %c %c %d %d %d"; int max_X = 0; int max_Y = 0; // Since the newer 1lq file does not contain the following lines, // do not use these values as the indicator of NumberOfRows or NumberOfColumns. // NumberOfRows should be the maximum value of Y+ 1 // NumberOfColumns should be the maximum value of X + 1 // Thus, skip the following lines: // (optional) COLS/ROWS=448 448 Dummy // (optional) Dummy Line. // (column heading) X Y SEQUENCE DESTYPE FEATURE QUALIFIER ReadNextLine(instr, line, MAXLINELENGTH); if (strncmp("COLS/ROWS", line, 9) == 0) { ReadNextLine(instr, line, MAXLINELENGTH); // skip the Dummy Line. if (strncmp("Dummy", line, 5) == 0) { ReadNextLine(instr, line, MAXLINELENGTH); } } while (1) { line[0] = 0; ReadNextLine(instr, line, MAXLINELENGTH); if (strlen(line) == 0) break; if (line[0] == '#') continue; if (sscanf(line, format, &entry.x, &entry.y, probe, &entry.destype, feat, qual, &entry.expos, &entry.plength, &entry.position, &entry.cbase, &entry.pbase, &entry.tbase, &entry.ipbase, &entry.unit, &entry.block, &entry.atom ) != NUM_1LQ_COLUMNS) { Clear(); return false; } entry.probe = probe; entry.qualifier = qual; entry.feature = feat; m_Entries.push_back(entry); if (entry.x > max_X) max_X = entry.x; if (entry.y > max_Y) max_Y = entry.y; } // NumberOfColumns is max of X + 1 // NumberOfRows is max of Y + 1 m_NumberColumns = max_X + 1; m_NumberRows = max_Y +1; return true; } /*! Rotate the data one time. */ void C1LQFileData::Rotate() { int x; for(list::iterator it=m_Entries.begin(); it!=m_Entries.end(); ++it) { // (x, y) ---rotation--> (x', y') // (0, 0) (0, 331) // (1, 0) (0, 330) // (330, 331) (331, 1) // (331, 331) (331, 0) // m_NumberColumns = 332 // Algorithm for rotation: x = (*it).x; (*it).x = (*it).y; (*it).y = m_NumberColumns - x - 1; } // Number of Columns becomes number of Rows and vice versa x = m_NumberColumns; m_NumberColumns = m_NumberRows; m_NumberRows = x; } ////////////////////////////////////////////////////////////////////// affxparser/src/fusion/file/1LQFileData.h0000644000175200017520000001036614516003651021115 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _1LQ_FILE_DATA_HEADER_ #define _1LQ_FILE_DATA_HEADER_ /*! \file 1LQFileData.h This file provides 1LQ file reading and storage capabilities. */ ////////////////////////////////////////////////////////////////////// #include "portability/affy-base-types.h" // #include #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affx1lq { /*! The number of columns in the file */ #define NUM_1LQ_COLUMNS 16 /*! Data for a line in the 1LQ file. */ typedef struct _DataEntry { /*! The x coodinate */ int32_t x; /*! The y coodinate */ int32_t y; /*! The probe sequence */ std::string probe; /*! The design type */ int32_t destype; /*! The feature */ std::string feature; /*! The qualifier */ std::string qualifier; /*! The expos */ int32_t expos; /*! The probe length */ int32_t plength; /*! The position */ int32_t position; /*! The cbase */ char cbase; /*! The probe base at the substitution position. */ char pbase; /*! The target base being interrogated by the probe. */ char tbase; /*! The ipbase */ char ipbase; /*! The unit */ int32_t unit; /*! The block */ int32_t block; /*! The atom */ int32_t atom; } DataEntry; /*! The class provides storage and reading capabilities for 1LQ files. */ class C1LQFileData { public: /*! Constructor */ C1LQFileData(); /*! Destructor */ ~C1LQFileData(); protected: /*! The full path to the BAR file. */ std::string m_FileName; /*! The number of rows of features on the array. */ int m_NumberRows; /*! The number of columns of features on the array. */ int m_NumberColumns; /*! The sequence results. */ std::list m_Entries; /*! The number of rotations that have been done on the data. */ int m_NumberRotates; /*! Reads the file data * @param instr The input file stream. * @return True if successful. */ bool Read(std::ifstream &instr); public: /*! Sets the full path to the BAR file. * @param name The file name. */ void SetFileName (const char *name) { m_FileName = name; } /*! Gets the full path of the file. * @return The full path of the file. */ std::string GetFileName() const { return m_FileName; } /*! Reads the contents of the file. * @return True if successful. */ bool Read(); /*! Checks if the file exists. * @return True if exists. */ bool Exists(); /*! Deallocate memory. */ void Clear(); /*! Gets the number of columns of features on the array. * @return The number of columns of features on the array. */ int GetNumberColumns() const { return m_NumberColumns; } /*! Gets the number of rows of features on the array. * @return The number of rows of features on the array. */ int GetNumberRows() const { return m_NumberRows; } /*! Return the number of times this data has been rotate. * @return The number of times this data has been rotate. * In normal condition, it should return 0 for no-rotation * or 1 for one time rotate. */ int GetNumberRotates() { return m_NumberRotates; } /*! Rotate the data one time. */ void Rotate(); /*! Gets the data list. * @return The data list. */ const std::list& GetEntries() const { return m_Entries; } }; //////////////////////////////////////////////////////////////////// } // namespace ////////////////////////////////////////////////////////////////////// #endif // !defined(_1LQ_FILE_DATA_HEADER_) affxparser/src/fusion/file/BARFileData.cpp0000644000175200017520000003232314516003651021454 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/BARFileData.h" #include "file/FileIO.h" // #include #include #include #include #include // #ifndef _MSC_VER #include #endif #ifdef _INCLUDE_UNISTD_HEADER_ #include #endif ////////////////////////////////////////////////////////////////////// using namespace affxbar; ////////////////////////////////////////////////////////////////////// CGDACSequenceResultItem::CGDACSequenceResultItem() : m_NumberDataPoints(0), m_NumberColumns(0), m_NumberParameters(0), m_pColumnTypes(NULL), m_ppData(NULL), m_pppData(NULL), m_bMapped(false), m_lpData(NULL), m_DataStartPosition(0) { } ////////////////////////////////////////////////////////////////////// CGDACSequenceResultItem::~CGDACSequenceResultItem() { for (int i=0; iGetColumnType(iCol) == BAR_DATA_INTEGER) { data.iValue = MmGetUInt32_N((uint32_t*)(m_lpData + offset)); } else { data.fValue = MmGetFloat_N((float*)(m_lpData + offset)); } } } ////////////////////////////////////////////////////////////////////// void CGDACSequenceResultItem::MakeShallowCopy(CGDACSequenceResultItem &orig) { m_Name = orig.m_Name; m_Version = orig.m_Version; m_GroupName = orig.m_GroupName; m_NumberDataPoints = orig.m_NumberDataPoints; m_NumberColumns = orig.m_NumberColumns; m_NumberParameters = orig.m_NumberParameters; m_pColumnTypes = orig.m_pColumnTypes; m_Parameters = orig.m_Parameters; m_pppData = &orig.m_ppData; m_ppData = NULL; m_bMapped = orig.m_bMapped; m_lpData = orig.m_lpData; m_DataStartPosition = orig.m_DataStartPosition; } ////////////////////////////////////////////////////////////////////// void CGDACSequenceResultItem::SetNumberDataPoints(int n) { m_NumberDataPoints = n; m_ppData = new BarSequenceResultData *[m_NumberDataPoints]; m_pppData = &m_ppData; for (int j=0; j 0) { return (m_GroupName + ":" + m_Version + ";" + m_Name); } else return m_Name; } ////////////////////////////////////////////////////////////////////// CBARFileData::CBARFileData() : m_Version(0), m_NumberSequences(0), m_NumberColumns(0), m_NumberParameters(0), m_lpFileMap(NULL), m_lpData(NULL), m_bFileOpen(false), m_bFileMapped(false) { #ifdef _MSC_VER m_hFileMap = INVALID_HANDLE_VALUE; m_hFile = INVALID_HANDLE_VALUE; #else m_MapLen = 0; m_fp = NULL; #endif } ////////////////////////////////////////////////////////////////////// CBARFileData::~CBARFileData() { Close(); } ////////////////////////////////////////////////////////////////////// void CBARFileData::GetResults(int index, CGDACSequenceResultItem &seq) { seq.MakeShallowCopy(m_Results[index]); } ////////////////////////////////////////////////////////////////////// bool CBARFileData::Exists() { // Find the file stats. struct stat st; return (stat(m_FileName.c_str(), &st) == 0); } ////////////////////////////////////////////////////////////////////// bool CBARFileData::ReadHeader() { // Read the header, clear memory if failed. if (ReadFile(true) == false) { Close(); return false; } return true; } ////////////////////////////////////////////////////////////////////// bool CBARFileData::Read() { // Open the file if (ReadFile() == false) { Close(); return false; } return true; } ////////////////////////////////////////////////////////////////////// bool CBARFileData::ReadFile(bool bReadHeaderOnly) { //bool retVal = false; // First close the file. Close(); // Read the header if (ReadHeaderSection() == false) { Close(); return false; } // Stop if just reading if (bReadHeaderOnly) { return true; } // Read the data. return ReadDataSection(); } ////////////////////////////////////////////////////////////////////// bool CBARFileData::ReadHeaderSection() { // Open the file. std::ifstream instr; instr.open(m_FileName.c_str(), std::ios::in | std::ios::binary); // Check if open if (!instr) { m_strError = "Unable to open the file."; return false; } // Magic number std::string magic; ReadFixedString(instr, magic, 8); // Version ReadFloat_N(instr, m_Version); // Number of sequendes int32_t cType; ReadInt32_N(instr, cType); m_NumberSequences=cType; // Columns int i = 0; ReadInt32_N(instr, cType); m_NumberColumns=cType; m_ColumnTypes.resize(m_NumberColumns); for (i=0; i #endif ////////////////////////////////////////////////////////////////////// #include "file/TagValuePair.h" // #include #include #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affxbar { ////////////////////////////////////////////////////////////////////// /*! The possible data types stored in a BAR file. */ typedef enum _GDACFILES_BAR_DATA_TYPE { /*! Double precision (64 bit) floating point. */ BAR_DATA_DOUBLE, /*! Single precision (32 bit) floating point. */ BAR_DATA_FLOAT, /*! 32 bit signed integer. */ BAR_DATA_INTEGER, /*! 16 bit signed integer. */ BAR_DATA_SHORT, /*! 8 bit signed integer. */ BAR_DATA_CHAR, /*! 32 bit unsigned integer. */ BAR_DATA_UINTEGER, /*! 16 bit unsigned integer. */ BAR_DATA_USHORT, /*! 8 bit unsigned integer. */ BAR_DATA_UCHAR } GDACFILES_BAR_DATA_TYPE; /*! An STL vector of data types. */ typedef std::vector GDACFILES_BAR_DATA_TYPE_VECTOR; ////////////////////////////////////////////////////////////////////// /*! This union is used to store data associated with a single value. */ typedef union _BarSequenceResultData { /*! Double precision (64 bit) floating point. */ double dValue; /*! Single precision (32 bit) floating point. */ float fValue; /*! 32 bit signed integer. */ int iValue; /*! 16 bit signed integer. */ short sValue; /*! 8 bit signed integer. */ char cValue; /*! 32 bit unsigned integer. */ unsigned int uiValue; /*! 16 bit unsigned integer. */ unsigned short usValue; /*! 8 bit unsigned integer. */ unsigned char ucValue; } BarSequenceResultData; ////////////////////////////////////////////////////////////////////// /*! This class provides storage for results associated with a sequence. */ class CGDACSequenceResultItem { public: /*! Constructor */ CGDACSequenceResultItem(); /*! Destructor */ ~CGDACSequenceResultItem(); protected: /*! The name of the sequence. */ std::string m_Name; /*! The version associated to the sequence. */ std::string m_Version; /*! The group name for the sequence. */ std::string m_GroupName; /*! The number of data points associated with the sequence. */ int m_NumberDataPoints; /*! The number of columns of data for each data point. */ int m_NumberColumns; /*! The number of name/value parameters. */ int m_NumberParameters; /*! A pointer to the BAR file's column types vector. */ GDACFILES_BAR_DATA_TYPE_VECTOR *m_pColumnTypes; /*! The parameter name/value array. */ TagValuePairTypeVector m_Parameters; /*! A pointer to the data. */ BarSequenceResultData **m_ppData; /*! A pointer to the data pointer. * This is used when making copies of the object. The data pointer is not duplicated, * just a pointer to the data object is made. */ BarSequenceResultData ***m_pppData; /*! Flag indicating if the file is memory mapped. */ bool m_bMapped; /*! Pointer to the memory mapped file. */ char *m_lpData; /*! The file position of the start of the data. */ int m_DataStartPosition; /*! Makes a copy of the object. The results are not copied to the new object, just * the pointer back to the originating object. * @param orig The object to copy. */ void MakeShallowCopy(CGDACSequenceResultItem &orig); public: /*! Gets the sequence name. * @return The sequence name. */ std::string GetName() const { return m_Name; } /*! Gets the version number. * @return The version number. */ std::string GetVersion() const { return m_Version; } /*! Gets the group name. * @return The group name. */ std::string GetGroupName() const { return m_GroupName; } /*! Returns the full name (name, group, version). * @return The full name. */ std::string GetFullName() const; /*! Gets the number of data points (rows). * @return The number of data points (rows). */ int GetNumberDataPoints() const { return m_NumberDataPoints; } /*! Gets the number of columns. * @return The number of columns. */ int GetNumberColumns() const { return m_NumberColumns; } /*! Gets the type of data for a column. * @param index The column index. * @return The type of data at the given column. */ GDACFILES_BAR_DATA_TYPE GetColumnType(int index) const { return (*m_pColumnTypes)[index]; } /*! Gets the data for a given position. * @param iData The row index. * @param iCol The column index. * @param data The returned data. */ void GetData(int iData, int iCol, BarSequenceResultData &data ); /*! Gets the number of parameters. * @return The number of parameters. */ int GetNumberParameters() const { return m_NumberParameters; } /*! Gets the parameter at the given index. * @param index The index to the parameter array. * @return The parameter. */ TagValuePairType &GetParameter(int index) { return m_Parameters[index]; } /*! Sets the sequence name. * @param name The sequence name. */ void SetName(const char *name) { m_Name = name; } /*! Sets the version. * @param ver The version. */ void SetVersion(const char *ver) { m_Version = ver; } /*! Sets the group name. * @param group The group name. */ void SetGroupName(const char *group) { m_GroupName = group; } /*! Sets the number of data points and allocate memory for the results. * @param n The number of data points (rows). */ void SetNumberDataPoints(int n); /*! Sets the data point (when writing a file). * @param nIndex The row index. * @param colIndex The column index. * @param data The value of the data point. */ void SetDataPoint(int nIndex, int colIndex, BarSequenceResultData &data); /*! Adds a parameter to the list. * @param tag The name of the parameter. * @param value The value of the parameter.* */ void AddParameter(std::string tag, std::string value); /*! Friend to the top level class. */ friend class CBARFileData; }; //////////////////////////////////////////////////////////////////// /*! The class provides storage and reading capabilities for BAR files. */ class CBARFileData { public: /*! Constructor */ CBARFileData(); /*! Destructor */ ~CBARFileData(); protected: /*! The full path to the BAR file. */ std::string m_FileName; /*! The version number of the file. */ float m_Version; /*! The number of sequences in the file. */ int m_NumberSequences; /*! The number of columns of data in each sequence entry. */ int m_NumberColumns; /*! The number of name/value paramters. */ int m_NumberParameters; /*! The start position of the data. */ int m_DataStartPosition; /*! The name/value parameters. */ TagValuePairTypeVector m_Parameters; /*! The types of data stored in each column. */ GDACFILES_BAR_DATA_TYPE_VECTOR m_ColumnTypes; /*! The sequence results. */ std::vector m_Results; /*! A string to hold error messages. */ std::string m_strError; /*! Reads the file. * @param bReadHeaderOnly Flag to indicate if the header should only be read. * @return True if successful. */ bool ReadFile(bool bReadHeaderOnly=false); /*! Reads the file header * @return True if successful. */ bool ReadHeaderSection(); /*! Reads the file data (after the header). * @return True if successful. */ bool ReadDataSection(); /*! Gets the size of a row of data. * @return The number of bytes per data row. */ int GetDataRowSize(); /*! Pointer to a memory mapped file. */ void *m_lpFileMap; /*! Pointer to the data in the memory mapped file. */ char *m_lpData; #ifdef _MSC_VER /*! Windows handle to the file. */ HANDLE m_hFileMap; /*! Windows handle to the file. */ HANDLE m_hFile; #else /*! File pointer for memory mapping. */ FILE *m_fp; /*! The size of the file. */ int m_MapLen; #endif /*! Flag indicating if the file is open. */ bool m_bFileOpen; /*! Flag indicating if the file is memory mapped. */ bool m_bFileMapped; public: /*! Sets the full path to the BAR file. * @param name The file name. */ void SetFileName (const char *name) { m_FileName = name; } /*! Gets the full path of the file. * @return The full path of the file. */ std::string GetFileName() const { return m_FileName; } /*! Reads the contents of the file. * @return True if successful. */ bool Read(); /*! Reads the header of the file. * @return True if successful. */ bool ReadHeader(); /*! Checks if the file exists. * @return True if exists. */ bool Exists(); /*! Closes and deallocate memory. */ void Close(); /*! Gets an error string for file read errors. * @return An error string for file read errors. */ std::string GetError() const { return m_strError; } /*! Gets the file version. * @return The file version. */ float GetVersion() const { return m_Version; } /*! Gets the number of sequence stored in the file. * @return The number of sequence stored in the file. */ int GetNumberSequences() const { return m_NumberSequences; } /*! Gets the number of columns of data per row. * @return The number of columns of data per row. */ int GetNumberColumns() const { return m_NumberColumns; } /*! Gets the number of name/value parameters. * @return The number of name/value parameters. */ int GetNumberParameters() const { return m_NumberParameters; } /*! Gets the name/value parameter. * @param index The index to the parameter of interest. * @return The name/value parameter given the index. */ TagValuePairType &GetParameter(int index) { return m_Parameters[index]; } /*! Gets the type of data stored. * @param index The column index. * @return The type of data stored in the column. */ GDACFILES_BAR_DATA_TYPE &GetColumnTypes(int index) { return m_ColumnTypes[index]; } /*! Gets the sequence data. * @param index The sequence index. * @param seq The sequence data. */ void GetResults(int index, CGDACSequenceResultItem &seq); /*! Adds a parameter to the list. * @param tag The parameter name. * @param value The parameter value. */ void AddAlgorithmParameter(const char *tag, const char *value); /*! Adds a column for writing a BAR file. * @param ctype The type of data to store at the column. */ void AddColumn(GDACFILES_BAR_DATA_TYPE ctype); /*! Sets the number of sequences to store in the BAR file. * @param n The number of sequences. */ void SetNumberSequences(int n); /*! Retrieves a pointer to the sequences results. This should only be used when * performing write operations. Use the GetResults functions when reading the file. * @param index The sequence index. * @return A pointer to the sequence results. */ CGDACSequenceResultItem *GetResultsPtr(int index) { return &m_Results[index]; } }; //////////////////////////////////////////////////////////////////// } // namespace ////////////////////////////////////////////////////////////////////// #endif // !defined(AFX_BARFILEDATA_H__BDEC0B48_58F6_480C_A988_05355DCF0BA4__INCLUDED_) affxparser/src/fusion/file/BARFileWriter.cpp0000644000175200017520000000756114516003651022065 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/BARFileWriter.h" // #include "file/FileWriter.h" // #include #include #include #include // #ifdef _INCLUDE_UNISTD_HEADER_ #include #endif #ifdef _MSC_VER #define snprintf _snprintf #pragma warning(disable: 4996) // don't show deprecated warnings. #endif ////////////////////////////////////////////////////////////////////// #define BAR_VERSION 2.0f ////////////////////////////////////////////////////////////////////// using namespace affxbar; using namespace affxbarwriter; ////////////////////////////////////////////////////////////////////// CBARFileWriter::CBARFileWriter() : CBARFileData() { } ////////////////////////////////////////////////////////////////////// CBARFileWriter::~CBARFileWriter() { Close(); } ////////////////////////////////////////////////////////////////////// bool CBARFileWriter::CreateNewFile() { m_Version = 1.0f; // Open the file. m_strError = ""; m_NewBarFile.open(m_FileName.c_str(), std::ios::out | std::ios::binary); if (!m_NewBarFile) { m_strError = "Unable to open the file."; return false; } return true; } ////////////////////////////////////////////////////////////////////// bool CBARFileWriter::Save() { // Magic number char magic[9]; snprintf(magic,sizeof(magic), "barr%c%c%c%c", '\r', '\n', '\032', '\n'); WriteFixedString(m_NewBarFile, std::string(magic), 8); // Version // WriteFloat_I(m_NewBarFile, BAR_VERSION); WriteFloat_N(m_NewBarFile, BAR_VERSION); // Number of sequendes WriteInt32_N(m_NewBarFile, m_NumberSequences); // Columns int i = 0; WriteInt32_N(m_NewBarFile, m_NumberColumns); for (i=0; i #include #include #include // #ifdef _MSC_VER #define snprintf _snprintf #pragma warning(disable: 4996) #endif using namespace std; using namespace affxbed; /*! The tab character. */ #define TAB "\t" /* * Clears any used memory. */ void IntervalGroup::Clear() { parameters.clear(); intervals.clear(); } /* * Initialize the class. */ BEDFileData::BEDFileData() { } /* * Clears any used memory. */ BEDFileData::~BEDFileData() { Clear(); } /* * Clears any used memory. */ void BEDFileData::Clear() { browser=""; track=""; intervalGroups.clear(); } /* * Format the browser string. */ void BEDFileData::FormatBrowser(const char *seq, int start, int stop) { char line[256]; snprintf(line,sizeof(line), "%s %s:%d-%d", BROWSER_LINE_START, seq, start, stop); browser=line; } /* * Format the track string. */ void BEDFileData::FormatTrack(const char *name, const char *desc) { char line[256]; if (desc && strlen(desc) > 0) snprintf(line,sizeof(line), "%s\"%s\" description=\"%s\"", TRACK_LINE_START, name, desc); else snprintf(line,sizeof(line), "%s\"%s\"", TRACK_LINE_START, name); track=line; } /* * Read the contents of the BED file into the BED object. */ bool BEDFileData::Read() { Clear(); // Open the file and read the data. ifstream instr(fileName.c_str(), ios::in); if (!instr) return false; // Read the remaining lines. int n; const int MAXLINELEN = 1024; char line[MAXLINELEN+1]; char seq[MAXLINELEN/4+1]; char name[MAXLINELEN/4+1]; IntervalEntry interval; IntervalGroup group; IntervalGroup *pGroup=NULL; TagValuePairType param; bool firstParameter=true; while (instr.getline(line, MAXLINELEN)) { // The browser line. if (strlen(line) > strlen(BROWSER_LINE_START) && strncmp(line, BROWSER_LINE_START, strlen(BROWSER_LINE_START)) == 0) Browser() = line; // The track line else if (strlen(line) > strlen(TRACK_LINE_START) && strncmp(line, TRACK_LINE_START, strlen(TRACK_LINE_START)) == 0) Track() = line; // Comment lines. These contain parameters. else if (line[0] == '#') { // If this is the first detected parameter then create a new group object. if (firstParameter == true) { firstParameter = false; group.Clear(); intervalGroups.push_back(group); pGroup = &(*intervalGroups.rbegin()); } char *token = strtok(line+1, TAB); param.Tag = token; token = strtok(NULL, TAB); if (token) { param.Value = token; pGroup->parameters.push_back(param); } } // This line contains an interval. else { // No longer looking at parameters. Reset the flag. firstParameter = true; // Create a new group. This is done here if the BED file has no parameters. if (intervalGroups.size() == 0) { group.Clear(); intervalGroups.push_back(group); } // Get the last group and add the interval to it. pGroup = &(*intervalGroups.rbegin()); n = sscanf(line, "%s %d %d %s %f %c", seq, &interval.start, &interval.stop, name, &interval.overlap, &interval.strand); if (n == 3) { strcpy(name, ""); interval.overlap = 0; interval.strand = ' '; } interval.seq = seq; interval.probeSetName = name; pGroup->intervals.push_back(interval); } } instr.close(); return true; } affxparser/src/fusion/file/BEDFileData.h0000644000175200017520000000637014516003651021112 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! \file BEDFileData.h This file defines a class to read BED files. */ #ifndef _BEDFileData_HEADER_ #define _BEDFileData_HEADER_ #include "file/IntervalEntry.h" #include "file/TagValuePair.h" // #include #include #include // namespace affxbed { /*! The first word of the browser line. */ #define BROWSER_LINE_START "browser position" /*! The first word of the track line. */ #define TRACK_LINE_START "track name=" /*! The default track name. */ #define DEFAULT_TRACK_NAME "\"AFFX track\"" /*! This type holds the parameters and intervals for a given group of intervals. */ typedef struct _IntervalGroup { /*! The parameters for the interval group. */ TagValuePairTypeList parameters; /*! The intervals. */ IntervalEntryList intervals; /*! Clears the structure */ void Clear(); } IntervalGroup; /*! An STL list of sequence data items. */ typedef std::list IntervalGroupList; /*! An STL constant list iterator. */ typedef IntervalGroupList::iterator IntervalGroupListIt; /*! Stores data associated with a BED file. */ class BEDFileData { protected: /*! The file name. */ std::string fileName; /*! The browser (first line of the BED file). */ std::string browser; /*! The track for the bed file (second line of the BED file). */ std::string track; /*! The intervals. */ IntervalGroupList intervalGroups; public: /*! Constructor */ BEDFileData(); /*! Destructor */ ~BEDFileData(); /*! Clears the data in the class. */ void Clear(); /*! The file name. * @return The file name. */ std::string &FileName() { return fileName; } /*! The browser (first line of the BED file). * @return The browser line */ std::string &Browser() { return browser; } /*! The track for the bed file (second line of the BED file). @ return The track line. */ std::string &Track() { return track; } /*! Format the browser string. * @param seq The sequence name. * @param start The start position. * @param stop The stop position. */ void FormatBrowser(const char *seq, int start, int stop); /*! Format the track string. * @param name The track name. * @param desc The description. */ void FormatTrack(const char *name="AFFX track", const char *desc=""); /*! The intervals. * @return The intervals */ IntervalGroupList &IntervalGroups() { return intervalGroups; } /*! Reads the entries from the file. * @return True if successful. */ bool Read(); }; } #endif affxparser/src/fusion/file/BEDFileWriter.cpp0000644000175200017520000000577714516003651022062 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/BEDFileWriter.h" // #include // using namespace std; using namespace affxbed; #ifdef _MSC_VER #pragma warning(disable: 4996) #endif /*! The tab character. */ #define TAB "\t" /* * Initialize the class. */ BEDFileWriter::BEDFileWriter() { } /* * Clears any used memory. */ BEDFileWriter::~BEDFileWriter() { Clear(); } /* * Save the intervals to a BED file. */ bool BEDFileWriter::Write() { // Return if no intervals if (intervalGroups.size() == 0) return true; // Create the output file. ofstream outstr(fileName.c_str(), ios::out); if (!outstr) return false; // Determine the start and stop position in the first sequence. int start=0; int stop=0; string seq; bool bFirst=true; IntervalEntryListConstIt iter; IntervalGroupListIt groupIt; for (groupIt=intervalGroups.begin(); groupIt!=intervalGroups.end(); ++groupIt) { IntervalGroup &group = *groupIt; for (iter=group.intervals.begin(); iter!=group.intervals.end(); ++iter) { const IntervalEntry &interval = *iter; if (bFirst) { bFirst = false; seq = interval.seq; start = interval.start; } if (interval.seq != seq) break; stop = interval.stop; } } FormatBrowser(seq.c_str(), start, stop); if (track.length() == 0) { track = TRACK_LINE_START; track += DEFAULT_TRACK_NAME; } // Write the header line outstr << browser << endl; outstr << track << endl; // Save the intervals for (groupIt=intervalGroups.begin(); groupIt!=intervalGroups.end(); ++groupIt) { IntervalGroup &group = *groupIt; TagValuePairTypeList::iterator paramIt; for (paramIt=group.parameters.begin(); paramIt!=group.parameters.end(); ++paramIt) { outstr << "#" << (*paramIt).Tag << TAB << (*paramIt).Value << endl; } for (iter=group.intervals.begin(); iter!=group.intervals.end(); ++iter) { const IntervalEntry &interval = *iter; outstr << interval.seq << "\t" << interval.start << "\t" << interval.stop; if (interval.probeSetName.length() > 0) { outstr << "\t" << interval.probeSetName << "\t" << interval.overlap << "\t" << interval.strand; } outstr << endl; } } // Check the status if (outstr.fail() != 0) return false; return true; } affxparser/src/fusion/file/BEDFileWriter.h0000644000175200017520000000246114516003651021512 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! \file BEDFileWriter.h This file defines a class to write BED files. */ #ifndef _BEDFileWriter_HEADER_ #define _BEDFileWriter_HEADER_ #include "file/BEDFileData.h" // namespace affxbed { /*! Provies write capabilities for BED files. */ class BEDFileWriter : public BEDFileData { public: /*! Constructor */ BEDFileWriter(); /*! Destructor */ ~BEDFileWriter(); /*! Writes the entries to a file. * @return True if successful. */ bool Write(); }; } #endif affxparser/src/fusion/file/BPMAPFileData.cpp0000644000175200017520000004035114516003651021707 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/BPMAPFileData.h" #include "file/FileIO.h" // #include #include #include #include #include #include #include // #ifndef _MSC_VER #include #endif #ifdef _INCLUDE_UNISTD_HEADER_ #include #endif ////////////////////////////////////////////////////////////////////// using namespace affxbpmap; ////////////////////////////////////////////////////////////////////// // The older bpmap files had a corruption in the way the floating point // values were written. This flag is used to toggle the way the values are // read. The older files should use ReadFloatFromOldBPMAP. static bool g_OlderBPMAPFileFormat=true; /////////////////////////////////////////////////////////////////////////////// bool GDACSequenceHitItemType::operator<(const GDACSequenceHitItemType &rhs) const { // numeric sort on position, then strand, then PM X, then PM Y, then MM X, then MM Y if( this->Position == rhs.Position && this->TopStrand == rhs.TopStrand && this->PMX == rhs.PMX && this->PMY == rhs.PMY && this->MMX == rhs.MMX && this->MMY < rhs.MMY) return true; if( this->Position == rhs.Position && this->TopStrand == rhs.TopStrand && this->PMX == rhs.PMX && this->PMY == rhs.PMY && this->MMX < rhs.MMX) return true; if( this->Position == rhs.Position && this->TopStrand == rhs.TopStrand && this->PMX == rhs.PMX && this->PMY < rhs.PMY) return true; if( this->Position == rhs.Position && this->TopStrand == rhs.TopStrand && this->PMX < rhs.PMX) return true; if( this->Position == rhs.Position && this->TopStrand< rhs.TopStrand) return true; if( this->Position < rhs.Position ) return true; return false; } ////////////////////////////////////////////////////////////////////// CBPMAPFileData::CBPMAPFileData() : m_NumberSequences(0), m_lpFileMap(NULL), m_lpData(NULL), m_bFileOpen(false), m_bFileMapped(false) { #ifdef _MSC_VER m_hFileMap = INVALID_HANDLE_VALUE; m_hFile = INVALID_HANDLE_VALUE; #else m_MapLen = 0; m_fp = NULL; #endif } ////////////////////////////////////////////////////////////////////// CBPMAPFileData::~CBPMAPFileData() { Close(); } ////////////////////////////////////////////////////////////////////// void CBPMAPFileData::Close() { m_DataStartPosition = 0; m_NumberSequences = 0; m_SequenceItems.erase(m_SequenceItems.begin(), m_SequenceItems.end()); #ifdef _MSC_VER if (m_bFileOpen) { if (m_bFileMapped) { UnmapViewOfFile(m_lpFileMap); m_lpFileMap = NULL; CloseHandle(m_hFileMap); m_hFileMap = NULL; m_lpData = NULL; } CloseHandle(m_hFile); m_hFile = INVALID_HANDLE_VALUE; m_bFileOpen = false; } #else if (m_fp == NULL) { if (m_bFileMapped == true) { munmap(m_lpFileMap, m_MapLen); m_MapLen = 0; } if (m_fp) fclose(m_fp); m_fp = NULL; } #endif } ////////////////////////////////////////////////////////////////////// bool CBPMAPFileData::Exists() { // Find the file stats. struct stat st; return (stat(m_FileName.c_str(), &st) == 0); } ////////////////////////////////////////////////////////////////////// bool CBPMAPFileData::ReadHeader() { // Read the header, Close memory if failed. if (ReadFile(true) == false) { Close(); return false; } return true; } ////////////////////////////////////////////////////////////////////// bool CBPMAPFileData::Read() { // Open the file if (ReadFile() == false) { Close(); return false; } return true; } ////////////////////////////////////////////////////////////////////// #define DNA_BASE_A 0 #define DNA_BASE_C 1 #define DNA_BASE_G 2 #define DNA_BASE_T 3 #define DNA_BASE_OTHER 4 static int int_to_acgt(unsigned int i, char *c) { switch(i) { case DNA_BASE_A: *c = 'A'; break; case DNA_BASE_C: *c = 'C'; break; case DNA_BASE_G: *c = 'G'; break; case DNA_BASE_T: *c = 'T'; break; default: *c = 'N'; break; } if(*c == 'N') return(EXIT_FAILURE); else return(EXIT_SUCCESS); } ////////////////////////////////////////////////////////////////////// static int byte_to_dna(const char *b, unsigned short n_b, char *d, unsigned short n_d) /* ** b is the array of bytes to be converted ** n_b is the length of the byte array ** d is the char array where the converted dna will be written ** n_d is the number of bases which should be extracted. */ { unsigned short n_bytes,i,j,k; /* Do we have enough bytes for the requested dna? */ if(n_d > 4*n_b) { //fprintf(stderr,"byte_to_dna: request for %d bases from %d bytes, not possible.\n",n_d,n_b); return(EXIT_FAILURE); } n_bytes = (unsigned short) ::ceil(n_d/(float)4); /* number of bytes we need to process */ for(i=0,j=0; i> (2*(3-k))) & 03,d+j)==EXIT_FAILURE) return EXIT_FAILURE; //err_bcel("byte_to_dna: problem translating int: %d, b[%u]=%u\n",(b[i] >> (2*(3-j))) & 03,i,b[i]); } } d[j] = '\0'; return(EXIT_SUCCESS); } ////////////////////////////////////////////////////////////////////// bool CBPMAPFileData::ReadFile(bool bReadHeaderOnly) { // First Close the memory. Close(); // Read the header if (ReadHeaderSection() == false) { Close(); return false; } // Stop if just reading if (bReadHeaderOnly) { return true; } // Read the data. return ReadDataSection(); } ////////////////////////////////////////////////////////////////////// bool CBPMAPFileData::ReadHeaderSection() { // Open the file. std::ifstream instr; instr.open(m_FileName.c_str(), std::ios::in | std::ios::binary); // Check if open if (!instr) { m_strError = "Unable to open the file."; return false; } // Read the magic number std::string magic; const int magic_size = BPMAP_FILE_HEADER_LEN; ReadFixedString(instr, magic, magic_size); // check the header is correct if(magic != BPMAP_FILE_HEADER_BYTES){ m_strError = "File header is not of BPMAP type. "; return false; } // Get the version number m_Version = 0.0f; ReadFloatFromOldBPMAP_N(instr, m_Version); g_OlderBPMAPFileFormat = true; // Some of the BPMAP files had the version number written incorrectly. // If invalid version number from the first read attempt then try again. if (m_Version > 3.5f || m_Version < 0.5f) { g_OlderBPMAPFileFormat = false; instr.seekg(magic_size, std::ios_base::beg); ReadFloat_N(instr, m_Version); } // Get the number of sequences uint32_t uval; ReadUInt32_N(instr, uval); m_NumberSequences = uval; // Allocate memory for the sequence data m_SequenceItems.resize(m_NumberSequences); // Determine the position of the start of the data m_DataStartPosition = instr.tellg(); // Close the file instr.close(); return true; } ////////////////////////////////////////////////////////////////////// bool CBPMAPFileData::ReadDataSection() { // Open the file. std::ifstream instr; instr.open(m_FileName.c_str(), std::ios::in | std::ios::binary); // Check if open if (!instr) { m_strError = "Unable to open the file."; return false; } // Skip to the data section instr.seekg(m_DataStartPosition); // Read the sequence data. int iSeq; uint32_t uval; for (iSeq=0; iSeq= 3.0f) { ReadUInt32_N(instr, uval); m_SequenceItems[iSeq].m_ProbePairs = uval; ReadUInt32_N(instr, uval); m_SequenceItems[iSeq].m_HitStartPosition = uval; } ReadUInt32_N(instr, uval); m_SequenceItems[iSeq].m_NumberHits = uval; if ((int) (m_Version+0.1) >= 2) { ReadUIntLenString_N(instr, m_SequenceItems[iSeq].m_GroupName); ReadUIntLenString_N(instr, m_SequenceItems[iSeq].m_SeqVersion); ReadUInt32_N(instr, uval); m_SequenceItems[iSeq].m_Parameters.resize(uval); for (int iParam=0; iParam<(int)uval; iParam++) { ReadUIntLenString_N(instr, m_SequenceItems[iSeq].m_Parameters[iParam].Tag); ReadUIntLenString_N(instr, m_SequenceItems[iSeq].m_Parameters[iParam].Value); } } } // WAS always declared regardless of _USE_MEM_MAPPING_, leading to // a compile warning on unused variables 'probeIn' and 'probeOut'. #ifndef _USE_MEM_MAPPING_ // Read more sequence data const int probeBufSize=64; unsigned char probeIn[PROBE_STORAGE_BUFFER_LENGTH]=""; char probeOut[probeBufSize]=""; #endif for (iSeq=0; iSeq 3.0f){ instr.seekg(m_SequenceItems[iSeq].m_HitStartPosition); ReadUInt32_N(instr, uval); m_SequenceItems[iSeq].m_Number = uval; } else { ReadUInt32_N(instr, uval); m_SequenceItems[iSeq].m_Number = uval; m_SequenceItems[iSeq].m_HitStartPosition = instr.tellg(); } #ifndef _USE_MEM_MAPPING_ m_SequenceItems[iSeq].m_Hits.resize(m_SequenceItems[iSeq].m_NumberHits); m_SequenceItems[iSeq].m_pHits = &m_SequenceItems[iSeq].m_Hits; // Read the hit data (probe pair data). for (int iHit=0; iHit 0) { return (m_GroupName + ":" + m_SeqVersion + ";" + m_Name); } else return m_Name; } ////////////////////////////////////////////////////////////////////// affxparser/src/fusion/file/BPMAPFileData.h0000644000175200017520000002337714516003651021365 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #if !defined(AFX_BPMAPFILEDATA_H__601D7E2E_5897_4CAA_ABF0_D6002698CB78__INCLUDED_) #define AFX_BPMAPFILEDATA_H__601D7E2E_5897_4CAA_ABF0_D6002698CB78__INCLUDED_ /*! \file BPMAPFileData.h This file provides BPMAP file reading capabilities. */ ////////////////////////////////////////////////////////////////////// #ifdef _MSC_VER #pragma warning(disable: 4786) // identifier was truncated in the debug information #include #endif ////////////////////////////////////////////////////////////////////// #include "file/TagValuePair.h" // #include #include #include #include // ////////////////////////////////////////////////////////////////////// #define BPMAP_FILE_HEADER_BYTES "PHT7\r\n\032\n" #define BPMAP_FILE_HEADER_LEN 8 #define PM_ONLY 1 #define PM_MM 0 ////////////////////////////////////////////////////////////////////// namespace affxbpmap { //////////////////////////////////////////////////////////////////// /*! The number of bytes used to store the probe sequence in the BPMAP file. */ #define PROBE_STORAGE_BUFFER_LENGTH 7 /*! This class provides storage for the array information for a hit (probe or probe pair interrogating a genomic position */ typedef struct _GDACSequenceHitItemType { /*! The X coordinate of the PM probe. */ unsigned int PMX; /*! The Y coordinate of the PM probe. */ unsigned int PMY; /*! The X coordinate of the MM probe. */ unsigned int MMX; /*! The Y coordinate of the MM probe. */ unsigned int MMY; /*! The match score for the PM probe. */ float MatchScore; /*! The genomic position the probe is interrogating. */ unsigned int Position; /*! The PM probe sequence. */ std::string PMProbe; /*! The length of the PM probe. */ unsigned char ProbeLength; /*! A flag indicating if the probe is designed to interrogate the top strand. * A value of 1 indicates top, 0 indicates bottom */ unsigned char TopStrand; /*! Comparison operator used by sort */ bool operator<(const _GDACSequenceHitItemType &rhs) const; /*! Packed PM probe sequence */ char PackedPMProbe[PROBE_STORAGE_BUFFER_LENGTH+1]; /*! Returns the genomic position relative to the center of the probe. * @return The genomic position. */ unsigned int getCenterPosition() { return Position + (ProbeLength-1)/2; } /*! Returns the genomic position relative to the start of the probe. * @return The genomic position. */ unsigned int getStartPosition() { return Position; } } GDACSequenceHitItemType; /*! The size of the hit item in the BPMAP file when both PM and MM are present. */ #define HIT_ITEM_SIZE_WITH_PROBE_PAIRS (4+4+4+4+1+PROBE_STORAGE_BUFFER_LENGTH+4+4+1) /*! The size of the hit item in the BPMAP file when only PM are present. */ #define HIT_ITEM_SIZE_WITH_PM_ONLY (4+4+1+PROBE_STORAGE_BUFFER_LENGTH+4+4+1) ////////////////////////////////////////////////////////////////////// /*! This class provides storage for the probes interrogating a sequence. */ class CGDACSequenceItem { public: /*! Constructor */ CGDACSequenceItem(); /*! Destructor */ ~CGDACSequenceItem(); protected: /*! The name of the sequence, e.g., chromosome. */ std::string m_Name; /*! The group name for the sequence, e.g., species or organism. */ std::string m_GroupName; /*! The version associated to the sequence, e.g., build version of the genomic database. */ std::string m_SeqVersion; /*! The sequence id. */ int m_Number; /*! Flag indicating if PM/MM pairs. */ int m_ProbePairs; /* The number of probes or probe pairs (hits). */ int m_NumberHits; /*! An array of hits. */ std::vector m_Hits; /*! A pointer to the array of hits. This is used in memory mapping files. */ std::vector *m_pHits; /*! The parameter name/value array. */ TagValuePairTypeVector m_Parameters; /*! Flag indicating if the file is memory mapped. */ bool m_bMapped; /*! Pointer to the memory mapped file. */ char *m_lpData; /*! The file position of the start of the data. */ int m_HitStartPosition; /*! Makes a copy of the object. The results are not copied to the new object, just * the pointer back to the originating object. * @param orig The object to copy. */ void MakeShallowCopy(CGDACSequenceItem &orig); public: /*! Gets the sequence name. * @return The sequence name. */ std::string GetName() const { return m_Name; } /*! Gets the group name. * @return The group name. */ std::string GroupName() const { return m_GroupName; } /*! Returns the full name (name, group, version). * @return The full name. */ std::string FullName() const; /*! Gets the version number. * @return The version number. */ std::string GetSeqVersion() const { return m_SeqVersion; } /*! Gets the probe mapping type in the sequence * @return 0 if PM-MM probe pairs, 1 if PM-Only probes. */ int GetProbeMapping () const { return m_ProbePairs; } /*! Gets the sequence number. * @return The sequence number. */ int GetNumber() const { return m_Number; } /*! Gets the number of hits (probes or probe pairs). * @return The number of hits (probes or probe pairs). */ int GetNumberHits() const { return m_NumberHits; } /*! Gets the number of tag/value parameters. * @return The number of tag/value parameters. */ int GetNumberParameters() const { return (int)m_Parameters.size(); } /*! Gets the parameter at the given index. * @param index The index to the parameter array. * @return The parameter. */ TagValuePairType GetParameter(int index); /*! Gets the hit data. * @param index The index to the hit of interest. * @param hit The hit (probe/probe pair) information. * @param readProbe A flag indicating if the probe sequence should be copied to the hit information. */ void GetHitItem(int index, GDACSequenceHitItemType &hit, bool readProbe=false); /*! Friend to the top level class. */ friend class CBPMAPFileData; }; ////////////////////////////////////////////////////////////////////// /*! This class provides storage and reading capabilities for the BPMAP file. */ class CBPMAPFileData { public: /*! Constructor */ CBPMAPFileData(); /*! Destructor */ ~CBPMAPFileData(); protected: /*! The full path to the BPMAP file. */ std::string m_FileName; /*! The number of sequences stored in the BPMAP file. */ int m_NumberSequences; /*! The version number of the BPMAP file. */ float m_Version; /*! The file position of the start of the data. */ int m_DataStartPosition; /*! A string to hold error messages. */ std::string m_strError; /*! An array of sequence items - used when not memory mapping. */ std::vector m_SequenceItems; /*! Reads the file. * @param bReadHeaderOnly Flag to indicate if the header should only be read. * @return True if successful. */ bool ReadFile(bool bReadHeaderOnly=false); /*! Reads the file header * @return True if successful. */ bool ReadHeaderSection(); /*! Reads the file data (after the header). * @return True if successful. */ bool ReadDataSection(); /*! Pointer to a memory mapped file. */ void *m_lpFileMap; /*! Pointer to the data in the memory mapped file. */ char *m_lpData; #ifdef _MSC_VER /*! Windows handle to the file. */ HANDLE m_hFileMap; /*! Windows handle to the file. */ HANDLE m_hFile; #else /*! File pointer for memory mapping. */ FILE *m_fp; /*! The size of the file. */ int m_MapLen; #endif /*! Flag indicating if the file is open. */ bool m_bFileOpen; /*! Flag indicating if the file is memory mapped. */ bool m_bFileMapped; public: /*! Sets the full path to the BPMAP file. * @param name The file name. */ void SetFileName (const char *name) { m_FileName = name; } /*! Gets the full path of the file. * @return The full path of the file. */ std::string GetFileName() const { return m_FileName; } /*! Reads the contents of the file. * @return True if successful. */ bool Read(); /*! Reads the header of the file. * @return True if successful. */ bool ReadHeader(); /*! Checks if the file exists. * @return True if exists. */ bool Exists(); /*! Closes and deallocate memory. */ void Close(); /*! Gets an error string for file read errors. * @return An error string for file read errors. */ std::string GetError() const { return m_strError; } /*! Gets the number of sequences stored in the BPMAP file. * @return The number of sequences stored in the BPMAP file. */ int GetNumberSequences() const { return m_NumberSequences; } /*! The BPMAP file version number. * @return The BPMAP file version number. */ float GetVersion() const { return m_Version; } /*! Gets the sequence data. * @param index The index to the sequence of interest. * @param seq The information about a sequence. */ void GetSequenceItem(int index, CGDACSequenceItem &seq); }; //////////////////////////////////////////////////////////////////// } // namespace ////////////////////////////////////////////////////////////////////// #endif // !defined(AFX_BPMAPFILEDATA_H__601D7E2E_5897_4CAA_ABF0_D6002698CB78__INCLUDED_) affxparser/src/fusion/file/BPMAPFileWriter.cpp0000644000175200017520000005034014516003651022311 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2004 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/BPMAPFileWriter.h" // #include "file/FileIO.h" #include "file/FileWriter.h" // #include #include #include #include #include #include #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) #endif using namespace affxbpmap; using namespace affxbpmapwriter; #define BPMAP_TAG_VALUE_START "#" #define BPMAP_GROUP_NAME "#seq_group_name" #define BPMAP_VERSION_NAME "#version" /////////////////////////////////////////////////////////////////////////////// CGDACSequenceItemWriter::CGDACSequenceItemWriter() { m_ProbePairsValue = -1; m_HitNeedsRedoing = false; } /////////////////////////////////////////////////////////////////////////////// CGDACSequenceItemWriter::~CGDACSequenceItemWriter() { m_Hits.erase(m_Hits.begin(), m_Hits.end()); m_HitList.erase(m_HitList.begin(), m_HitList.end()); } /////////////////////////////////////////////////////////////////////////////// CGDACSequenceItemWriter CGDACSequenceItemWriter::copyMe() { CGDACSequenceItemWriter copy = CGDACSequenceItemWriter(); // copy over relevant items copy.m_GroupName = m_GroupName; copy.m_SeqVersion = m_SeqVersion; copy.m_Parameters = m_Parameters; return(copy); } /////////////////////////////////////////////////////////////////////////////// CBPMAPFileWriter::CBPMAPFileWriter() : CBPMAPFileData() { m_Version = BPMAP_VERSION; m_TpmapFileName =""; } /////////////////////////////////////////////////////////////////////////////// CBPMAPFileWriter::~CBPMAPFileWriter() { Close(); m_TpmapFileName=""; } /////////////////////////////////////////////////////////////////////////////// void CBPMAPFileWriter::SetTpmapFileName(const char *str) { m_TpmapFileName = str; } /////////////////////////////////////////////////////////////////////////////// bool CBPMAPFileWriter::TpmapExists() { // Find the file stats. struct stat st; return (stat(m_TpmapFileName.c_str(), &st) == 0); } /////////////////////////////////////////////////////////////////////////////// int tokenize(const std::string& lineBuffer, std::list& tokens){ // gets tokens delimited by white space from lineBuffer into a list of tokens // returns the number of tokens found std::string token; std::stringstream ss(lineBuffer); tokens.clear(); while (ss >> token) tokens.push_back(token); return( (int)tokens.size()); } /////////////////////////////////////////////////////////////////////////////// int CGDACSequenceItemWriter::operator<(const CGDACSequenceItemWriter &rhs) const { // sort on m_GroupName, then on m_SeqVersion, then on m_Name if( this->m_GroupName == rhs.m_GroupName && this->m_SeqVersion == rhs.m_SeqVersion && this->m_Name < rhs.m_Name) return 1; if( this->m_GroupName == rhs.m_GroupName && this->m_SeqVersion < rhs.m_SeqVersion) return 1; if( this->m_GroupName < rhs.m_GroupName ) return 1; return 0; } ////////////////////////////////////////////////////////////////////////////// bool CGDACSequenceItemWriter::processGroupName(const std::string& tag, const std::string& value) { if( tag == BPMAP_GROUP_NAME) { m_GroupName = value; m_Name = ""; m_SeqVersion = ""; return (true); } return(false); } /////////////////////////////////////////////////////////////////////////////// bool CGDACSequenceItemWriter::processVersion(const std::string& tag, const std::string& value) { if( tag == BPMAP_VERSION_NAME) { m_SeqVersion = value; return (true); } return(false); } /////////////////////////////////////////////////////////////////////////////// bool CGDACSequenceItemWriter::processGenericTagValue(const std::string& tag, const std::string& value) { TagValuePairType tagValue; if (tag.length() <=1){ // tag is a singleton # character, not permissible return( false ); } std::string nTag = tag.substr(1, tag.length() -1); // strip the leading "#" tagValue.Tag = nTag; tagValue.Value = value; m_Parameters.push_back(tagValue); return(true); } /////////////////////////////////////////////////////////////////////////////// #define DNA_BASE_A 0 #define DNA_BASE_C 1 #define DNA_BASE_G 2 #define DNA_BASE_T 3 #define DNA_BASE_OTHER 4 int acgt_to_int(char c, unsigned int *i) { switch(c) { case 'a': case 'A': *i = DNA_BASE_A; break; case 'c': case 'C': *i = DNA_BASE_C; break; case 'g': case 'G': *i = DNA_BASE_G; break; case 't': case 'T': *i = DNA_BASE_T; break; default: *i = DNA_BASE_OTHER; break; } if(*i == DNA_BASE_OTHER) return(EXIT_FAILURE); else return(EXIT_SUCCESS); } /////////////////////////////////////////////////////////////////////////////// /* ** d is a char array, should consist only of [acgtACGT] ** n_d is the length of d ** b is the array to which the byte form of the dna will be written ** n_b is the available number of bytes (so we can check if there is enough space) */ int dna_to_byte(const std::string& sequence, char *packedSequence) { unsigned short i,j,k; unsigned int base; int n_d = (int)sequence.length(); /* Number of bytes required - we pack 4 bases per byte */ /* n_bytes = (unsigned short) ceil(n_d/(float)4); */ for(i=0,j=0; i& tokens) { unsigned int val; float fval; GDACSequenceHitItemType hit; if (!(6 <= (int)tokens.size()) && ((int)tokens.size() <= 9)){ return( false ); } std::string sequence = tokens.front(); tokens.pop_front(); std::string strand = tokens.front(); tokens.pop_front(); std::string seqName = tokens.front(); tokens.pop_front(); std::string hitPosition = tokens.front(); tokens.pop_front(); std::string pmX = tokens.front(); tokens.pop_front(); std::string pmY = tokens.front(); tokens.pop_front(); std::string mmX; std::string mmY; // no match score in current TPMAPS std::string matchScore = "1.0"; // in the future a match score can be tacked onto the end of the line if (tokens.size() == 1) { matchScore = tokens.front(); tokens.pop_front(); } // check the seqName for consistency if (m_Name.size() == 0 ) m_Name = seqName; if (m_Name != seqName) { // this SequenceItemWriter instance is finished m_HitNeedsRedoing = true; return(true); } if (m_ProbePairsValue < 0) { // initialize if (tokens.size() >= 2) { // PM and MM present m_ProbePairsValue = PM_MM; } else { // PM only present m_ProbePairsValue = PM_ONLY; } } else { // test for consistency presence or absence of mismatch if ((m_ProbePairsValue == PM_MM) && (tokens.size() < 2)){ m_strError = "Too few tokens to process MM probes in PM/MM line"; return (false); } if ((m_ProbePairsValue == PM_ONLY) && (tokens.size() >= 2)){ m_strError = "Too many tokens left to process in PMOnly line"; return (false); } } if(m_ProbePairsValue == PM_MM){ mmX = tokens.front(); tokens.pop_front(); mmY = tokens.front(); tokens.pop_front(); } if (tokens.size() == 1) { matchScore = tokens.front(); tokens.pop_front(); } // 1. process the sequence memset(&hit.PackedPMProbe[0], 0, PROBE_STORAGE_BUFFER_LENGTH+1); char *packedSequence = &hit.PackedPMProbe[0]; int len = dna_to_byte(sequence, packedSequence); if (len == 0){ m_strError = "failed to pack sequence token: " + sequence; return(false); } // 2. process the probe length hit.ProbeLength = (int)sequence.size(); // 3. process the strand const char ch = strand[0]; if (strand.size() != 1){ m_strError = "Unrecognized strand token: " + strand; return(false); } switch(ch) { case 't': case 'T': case '1': case '+': hit.TopStrand = 1; break; case 'f': case 'F': case '0': case '-': hit.TopStrand = 0; break; default: m_strError = "Unrecognized strand token: " + strand; return( false ); } // 4. process the hitPosition if( sscanf(hitPosition.c_str(), "%u", &val) == EOF){ //error m_strError = "Unrecognized hitPosition: " + hitPosition; return(false); } hit.Position = val; // 5. process the PM X position if( sscanf(pmX.c_str(), "%u", &val) == EOF){ //error m_strError = "Unrecognized PM X: " + pmX; return(false); } hit.PMX = val; // 6. process the PM Y position if( sscanf(pmY.c_str(), "%u", &val) == EOF){ //error m_strError = "Unrecognized PM Y: " + pmY; return(false); } hit.PMY = val; // 7. process the match score if( sscanf(matchScore.c_str(), "%f", &fval) == EOF){ //error m_strError = "Unrecognized Match Score: " + matchScore; return(false); } hit.MatchScore = fval; if ( !( (0.0 <= hit.MatchScore) && (hit.MatchScore <= 1.0)) ) { //error m_strError = "Match Score must be at least 0.0 and no greater than 1.0: " + matchScore; return (false); } if ( m_ProbePairsValue == PM_MM ){ // 8. process the MM X position if( sscanf(mmX.c_str(), "%u", &val) == EOF){ //error m_strError = "Unrecognized MM X: " + mmX; return(false); } hit.MMX = val; // 9. process the PM Y position if( sscanf(mmY.c_str(), "%u", &val) == EOF){ //error m_strError = "Unrecognized MM Y: " + mmY; return(false); } hit.MMY = val; } m_HitList.push_front(hit); return(true); } /////////////////////////////////////////////////////////////////////////////// bool CBPMAPFileWriter::ReadTpmap() { // pseudo code // Make a GDACSequenceItem // Read in the first line // expect a format: // #group_name\tString // Read in second line // expect format: // #version\tString // other parameters have the form: // #name\tString // and must occur after the #version line. // Read in subsequent lines: Sequence\tStrand\tName\tPos\tX\tY\tX\tY // and make a GDACSequenceHitItemType and add to the array within // the GDACSequenceItem until we see the next // #group_name\tString // or EOF // sort by Group Name then by Version then by Name // within each set sort by position and then by strand m_ReadSucceeded = false; bool processingHitItems = true; //enter the while loop looking for a groupName tag // bool processingGenericTagValues = false; // unused bool processingTagValues = false; bool processingGroupName = true; bool processingVersion = true; std::string tag; std::string value; std::string lineBuffer; std::string token; m_strError = ""; std::ifstream tpMapFileStr; bool use_cin = (m_TpmapFileName.size() == 0); if(!use_cin) { // Open the file if set tpMapFileStr.open(m_TpmapFileName.c_str(), std::ios::in); if (!tpMapFileStr) { m_strError = "Unable to open " + m_TpmapFileName; return( false ); } } try { while (1) { if (use_cin) { if (!getline(std::cin, lineBuffer)) break; } else { if (!getline(tpMapFileStr, lineBuffer)) break; } std::list tokens; int ntokens = tokenize(lineBuffer, tokens); if (ntokens == 0) continue; // ignore lines with 0 tokens // ignore lines beginning with "#" followed by any white space if (tokens. front() == BPMAP_TAG_VALUE_START) continue; // set up the logic to process the line if (lineBuffer.substr(0,1) == BPMAP_TAG_VALUE_START){ // tag is present tag = tokens.front(); tokens.pop_front(); std::transform(tag.begin(), tag.end(), tag.begin(), tolower); if (ntokens == 1) { // tag is present with no value value = ""; } else { value = tokens.front(); } bool tagDone = false; if (tag == BPMAP_GROUP_NAME) { if (processingHitItems) { // should have just finished a block of probes (unless first time through) processingGroupName = true; processingVersion = false; processingTagValues = false; processingHitItems = false; tagDone = true; } else { //error m_strError = "Saw #group_name tag not at beginning of file or not after a set of probes in " + m_TpmapFileName; return(false); } } if (tag == BPMAP_VERSION_NAME){ // should have just processed the #seq_group_name tag if (processingGroupName) { processingVersion = true; processingGroupName = false; processingTagValues = false; processingHitItems = false; tagDone = true; } else { // error out m_strError = "Saw #version tag not following #group_name tag in " + m_TpmapFileName; return(false); } } if (!tagDone) { //should have previously processed the #version tag if (processingVersion) { processingTagValues = true; processingGroupName = false; processingHitItems = false; } else { //error out m_strError = "Saw a generic tag in line not following #version tag in " + m_TpmapFileName + ": " + lineBuffer; return(false); } } } else { // no tag was present, must be processing probe hit items processingHitItems = true; processingTagValues = false; processingGroupName = false; processingVersion = false; } if ( !(processingHitItems || processingTagValues || processingGroupName || processingVersion) ) { // error out m_strError = "Invalid input line: " + lineBuffer + " in " + m_TpmapFileName; return(false); } if (processingGroupName) { // make a new GDACSequenceItem m_SequenceItemList.push_front(CGDACSequenceItemWriter()); if (!m_SequenceItemList.front().processGroupName(tag, value)) { m_strError = "Expected to see #group_name, saw: " + lineBuffer + " in " + m_TpmapFileName; return( false ); } continue; } if (processingVersion && !processingTagValues) { if (!m_SequenceItemList.front().processVersion(tag, value)) { m_strError = "Expected to see #version, saw: " + lineBuffer + " in " + m_TpmapFileName; return( false ); } continue; } if (processingTagValues) { // processing tag/value pairs if (!m_SequenceItemList.front().processGenericTagValue(tag, value)) { m_strError = "Expected #tag/value pair, saw: " + lineBuffer + " in " + m_TpmapFileName; return( false ); } continue; } if (processingHitItems) { if (!m_SequenceItemList.front().processHitItem(tokens)){ m_strError = "Error processing sequence hit, line: " + lineBuffer + " in " + m_TpmapFileName + "\n" + m_SequenceItemList.front().GetError(); return( false ); } if( m_SequenceItemList.front().m_HitNeedsRedoing ){ // test to see whether this m_SequenceItemList.front() should be finished // this covers the case when a new SeqName needs to be created with // an ongoing Group Name and Version // make a new CGDACSequenceItemWriter ntokens = tokenize(lineBuffer, tokens); // remake the token list m_SequenceItemList.push_front(m_SequenceItemList.front().copyMe()); if (!m_SequenceItemList.front().processHitItem(tokens)){ //retry m_strError = "Error processing sequence hit, line: " + lineBuffer + " in " + m_TpmapFileName + "\n" + m_SequenceItemList.front().GetError(); return( false ); } } continue; } } //end while // now everything is in memory and we do the big sort m_SequenceItemList.sort(); std::list::iterator i; for(i=m_SequenceItemList.begin(); i != m_SequenceItemList.end(); ++i) { (*i).m_HitList.sort(); } m_ReadSucceeded = true; return( true ); } catch (std::string exceptionString) { m_strError = "Exception: " + exceptionString + " raised while reading input"; tpMapFileStr.close(); } return( false ); } bool CBPMAPFileWriter::WriteBpmap() { // write the BPMAP file header // iterate through all the GDACSequenceItem // write each Sequence Description // iterate through all the GDACSequenceItem and then GDACSequenceItemHitType // write position information until the Group name/Version changes // no BPMAP footer if(!m_ReadSucceeded) { m_strError = "No data to write."; return ( false ); } std::ofstream of; of.open(m_FileName.c_str(), std::ios::out | std::ios::binary); // check if open if(!of) { m_strError = "Unable to open file " + m_FileName + " for output."; return ( false ); } try { // This file is written in network (big-endian) format // Version is 3.0 // Write File Header (Section 1) std::string magic = BPMAP_FILE_HEADER_BYTES; WriteFixedString(of, magic, BPMAP_FILE_HEADER_LEN); // 1.1 WriteFloat_N(of, m_Version); // 1.2 WriteUInt32_N(of, (uint32_t)m_SequenceItemList.size()); // 1.3 // Write blocks of Sequence description (Section 2) unsigned int offsetPlaceHolder = 0; std::list::iterator i; for(i=m_SequenceItemList.begin(); i != m_SequenceItemList.end(); ++i) { WriteString_N(of, (*i).m_Name); // 2.1, 2.2 WriteUInt32_N(of, (*i).m_ProbePairsValue);// 2.3 (*i).m_offsetPosition = of.tellp(); WriteUInt32_N(of, offsetPlaceHolder); // 2.4 WriteUInt32_N(of, (uint32_t)(*i).m_HitList.size()); // 2.5 WriteString_N(of, (*i).m_GroupName); // 2.6, 2.7 WriteString_N(of, (*i).m_SeqVersion); // 2.8, 2.9 WriteUInt32_N(of, (uint32_t)(*i).m_Parameters.size());// 2.10 TagValuePairTypeVector::iterator j; for(j=(*i).m_Parameters.begin(); j != (*i).m_Parameters.end(); ++j) { WriteString_N(of, (*j).Tag); // 2.10.1, 2.10.2 WriteString_N(of, (*j).Value); // 2.10.3, 2.10.4 } } // Write blocks of Probe-sequence mappings (Section 3) unsigned int sequenceIndex = 0; for(i=m_SequenceItemList.begin(); i != m_SequenceItemList.end(); ++i) { (*i).m_HitStartPosition = of.tellp(); WriteUInt32_N(of, sequenceIndex); // 3.1.1 SequenceID (really index) sequenceIndex++; std::list::iterator j; for(j=(*i).m_HitList.begin(); j != (*i).m_HitList.end(); ++j) { WriteUInt32_N(of, (*j).PMX); // 3.2.1 WriteUInt32_N(of, (*j).PMY); // 3.2.2 if((*i).m_ProbePairsValue == PM_MM){ WriteUInt32_N(of, (*j).MMX); // 3.2.3 WriteUInt32_N(of, (*j).MMY); // 3.2.4 } WriteUInt8(of, (*j).ProbeLength); // 3.2.5 WriteCharacterArray(of, (*j).PackedPMProbe, PROBE_STORAGE_BUFFER_LENGTH); // 3.2.6 WriteFloat_N(of, (*j).MatchScore); // 3.2.7 WriteUInt32_N(of, (*j).Position); // 3.2.8 WriteUInt8(of, (*j).TopStrand); // 3.2.9 } } // Write offsets into file for(i=m_SequenceItemList.begin(); i != m_SequenceItemList.end(); ++i) { of.seekp((*i).m_offsetPosition); WriteUInt32_N(of, (*i).m_HitStartPosition); // 2.4 fixup } of.close(); return( true ); } catch (std::string exceptionString) { m_strError = "Exception: " + exceptionString + " raised while writing output file " + m_FileName; of.close(); } return( false ); } affxparser/src/fusion/file/BPMAPFileWriter.h0000644000175200017520000002517114516003651021762 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2004 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _BPMAPFILEWRITER_H_ #define _BPMAPFILEWRITER_H_ /*! \file BPMAPFileWriter.h This file provides BPMAP file writing capabilities. */ #include "file/BPMAPFileData.h" // #include // namespace affxbpmapwriter { #define BPMAP_VERSION 3.0 /////////////////////////////////////////////////////////////////////////////// /// affxcel::CBPMAPFileWriter /// /// \brief BPMAP file writer object /// /// Input: tpmap file or if unset, standard input /* Outut is a bpmap file; cannot be piped to standard out INTRODUCTION The BPMAP (binary probe map) file format is used to store the relationship between (PM,MM) probe pairs and positions on a set of sequences of interest. The format is binary with data stored in big-endian format. OVERVIEW OF FILE FORMAT The following lists the sections and their order and placement in the file. The definition of each section is detailed below. We assume there are N sequences and M_i probe pairs mapping to sequence i (for i = 1 to N). 1 File Header 2 Sequence descriptions: + Sequence Description for sequence #1 + Sequence Description for sequence #2 + ... + Sequence Description for sequence #N 3 Probe-sequence mappings: + Probe-sequence mapping for sequence #1: - Sequence ID for sequence #1 - Position Information for probe pair #1 of sequence #1 - Position Information for probe pair #2 of sequence #1 - ... - Position Information for probe pair #M_1 of sequence #1 + Probe-sequence mapping for sequence #2: - Sequence ID for sequence #2 - Position Information for probe pair #1 of sequence #2 - Position Information for probe pair #2 of sequence #2 - ... - Position Information for probe pair #M_2 of sequence #2 + ... + Probe-sequence mapping for sequence #N: - Sequence ID for sequence #2 - Position Information for probe pair #1 of sequence #N - Position Information for probe pair #2 of sequence #N - ... - Position Information for probe pair #M_N of sequence #N 4 File footer DETAILS OF FILE FORMAT 1 File Header 1.1 The first 8 bytes always contain the following values, to identify the file: (octal) 120 110 124 067 015 012 032 012 (hexadecimal) 50 48 54 37 0d 0a 1a 0a (ASCII C notation) P H T 7 \r \n \032 \n 1.2 Version number, float. Valid versions are 1.0, 2.0 and 3.0. (4 bytes) 1.3 N, the number of sequences, stored as an uint32_t (4 bytes). 2 Sequence Descriptions - the next N blocks (one for each sequence) respect the following format: Note that the N blocks are required to be in a specific order, to enable fast searches. The ordering of sequences is defined by: Lex sort on sequence group name (version >= 2.0 only), Lex sort on sequence version (version >= 2.0 only), Lex sort on Sequence name 2.1 Length of the sequence name, uint32_t (4 bytes) 2.2 Sequence name stored as a sequence of chars, length specified by 2.1 above. ---- Items 2.3-2.4 are only present in versions >= 3.0 of the format ---- 2.3 Probe Mapping type, unsigned int (4 bytes) 0 indicates a (PM,MM) probe pair tiling across the sequence. 1 indicates a PM-only tiling across the sequence. 2.4 Sequence file offset, unsigned int (4 bytes) The offset (in bytes) of the probe-sequence mapping info in section 3, after the end of the sequence description section. Intended to enable fast look-up ability. ---- End version >= 3.0 specific section ---- 2.5 Number of probes/probe pairs in the sequence, uint32_t (4 bytes) ---- Items 2.6 through 2.10 are only included if the version is >= 2.0 ---- 2.6 Length of the sequence group name, uint32_t (4 bytes) 2.7 Sequence group name (such as the organism to which the sequence belongs) stored as chars, length specified by 2.6 2.8 Length of the sequence version, uint32_t (4 bytes) 2.9 Sequence version stored as chars, length specified by 2.8 2.10 Number of meta-data tag-value pairs, uint32_t (4 bytes) For each meta-data tag-value pair we store the following: 2.10.1 Length of key in meta data block, uint32_t (4 bytes) 2.10.2 Key in meta data block stored as chars, length specified by 2.10.1 2.10.3 Length of value in meta data block, uint32_t (4 bytes) 2.10.4 Value in meta data block stored as chars, length specified by 2.10.3 ---- End version >= 2.0 specific section ---- 3 Probe-sequence mappings - the next N blocks (one for each sequence) respect the following format: 3.1 Header info specific to the sequence 3.1.1 Sequence ID, uint32_t (4 bytes) 3.2 The next M_i blocks (one for each probe or probe pair aligning to the sequence, from 2.3) respect the format below. Note that the N blocks are required to be in a specific order, to enable fast searches. The ordering is defined as follows: Numeric sort on position Numeric sort on strand indicator Numeric sort on X coordinate of PM probe Numeric sort on Y coordinate of PM probe Numeric sort on X coordinate of MM probe (if there is an MM) Numeric sort on Y coordinate of MM probe (if there is an MM) 3.2.1 X coordinate (0-based) on array of the PM probe as uint32_t (4 bytes) 3.2.2 Y coordinate (0-based) on array of the PM probe as uint32_t (4 bytes) --- in versions >= 3.0 the next two sections are skipped if the mapping type is PM-only, as indicated in 3.2.1 --- 3.2.3 X coordinate (0-based) on array of the MM probe as uint32_t (4 bytes) 3.2.4 Y coordinate (0-based) on array of the MM probe as uint32_t (4 bytes) 3.2.5 Length of the PM (and MM if a probe pair) as unsigned char (1 byte) 3.2.6 DNA Probe sequence. The probe sequence is packed into a 7 byte sequence of chars. Each byte represents up to 4 bases (so the format can handle probes of length up to 27bp). The first byte contains the first 4 bases of the probe. The first base of the probe is encoded in the two most significant bits of the first byte. The fourth base of the probe is encoded in the two least significant bits of the first byte. The conversion from each pair of bits to a DNA base is as follows: (0,1,2,3) -> (A,C,G,T) 3.2.7 Match score as float (4 bytes). The current BPMAP files are based on perfect 25-mer alignment so the scores are 1. (4 bytes) 3.2.8 Position of PM probe within the sequence as uint32_t (4 bytes). The position is the 0-based position of the lower coordinate of the 25-mer aligned to the target. 3.2.9 Strand indicator as unsigned char (1 byte). Value is 1 if the matching target (not the probe) is on the forward strand, 0 if on the reverse. */ /////////////////////////////////////////////////////////////////////////////// class CGDACSequenceItemWriter : public affxbpmap::CGDACSequenceItem { public: /*! Constructor */ CGDACSequenceItemWriter(); /*! Destructor */ ~CGDACSequenceItemWriter(); /*! Gets an error string for file read errors. * @return An error string for file read errors. */ std::string GetError() const { return m_strError; } /*! Make a copy with specific fields set */ CGDACSequenceItemWriter copyMe(); protected: /*! A string to hold error messages. */ std::string m_strError; /*! offset into the BPMAP file for writing position of data blocks of hits */ std::streampos m_offsetPosition; /*! flag to see whether this instance needs to be redone */ bool m_HitNeedsRedoing; /*! int for PM_ONLY or PM_MM in sequence */ int m_ProbePairsValue; public: //dont want to make this public but can't figure how /*! Comparison operator used by sort */ int operator<(const CGDACSequenceItemWriter &rhs) const; protected: /*! List of hits for efficiency in file processing */ std::list m_HitList; /*! Code to process a text line beginning with the Group Name tag */ bool processGroupName(const std::string&, const std::string&); /*! Code to process a text line beginning with the Version tag */ bool processVersion(const std::string&, const std::string&); /*! Code to process a text line beginning with any other tag */ bool processGenericTagValue(const std::string&, const std::string&); /*! Code to process a Sequence Hit text line */ bool processHitItem(std::list& ); friend class CBPMAPFileWriter; }; class CBPMAPFileWriter : public affxbpmap::CBPMAPFileData { public: /*! Constructor */ CBPMAPFileWriter(); /*! Destructor */ ~CBPMAPFileWriter(); protected: /*! The full path to the input TPMAP file */ std::string m_TpmapFileName; /*! List of sequence items */ std::list m_SequenceItemList; /*! Read succeeded */ bool m_ReadSucceeded; public: /*! Gets the full path of the TPMAP file. * @return The full path of the TPMAP file. */ std::string GetTpmapFileName() const { return m_TpmapFileName; } /*! Sets the input TPMAP file name, if not set ReadTpmap uses stdin * @param str The input TPMAP file name (full path). */ void SetTpmapFileName(const char *str); /*! Writes a version 3 binary BPMAP file. * @return True if successful */ bool WriteBpmap(); /*! Tests whether the input TPMAP file exists * @return True if successful */ bool TpmapExists(); /*! Reads the TPMAP file * @return True if successful */ bool ReadTpmap(); }; ////////////////////////////////////////////////////////////////////// } // namespace //////////////////////////////////////////////////////////////////// #endif // _BPMAPFILEWRITER_H_ affxparser/src/fusion/file/CDFFileData.cpp0000644000175200017520000011513314516003651021445 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "file/CDFFileData.h" // #include "file/FileIO.h" // #include "portability/affy-base-types.h" // #include #include #include #include #include #include #include #include #include #include // #ifndef _MSC_VER #include #endif #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #endif #ifdef _INCLUDE_UNISTD_HEADER_ #include #endif using namespace affxcdf; ////////////////////////////////////////////////////////////////////// // ascii(67) == 0x43 ==> 'C' #define CDF_FILE_MAGIC_NUMBER 67 #define CDF_FILE_VERSION_NUMBER 4 ////////////////////////////////////////////////////////////////////// CCDFFileHeader::CCDFFileHeader() : m_Magic(0), m_Version(0), m_NumProbeSets(0), m_NumQCProbeSets(0), m_Cols(0), m_Rows(0) { } ////////////////////////////////////////////////////////////////////// CCDFFileData::CCDFFileData() { } ////////////////////////////////////////////////////////////////////// CCDFFileData::~CCDFFileData() { Close(); } ////////////////////////////////////////////////////////////////////// CCDFProbeInformation::CCDFProbeInformation() : m_ListIndex(0), m_Expos(0), m_X(0), m_Y(0), m_PBase(' '), m_TBase(' '), m_ProbeLength(0), m_ProbeGrouping(0) { } ////////////////////////////////////////////////////////////////////// CCDFProbeGroupInformation::CCDFProbeGroupInformation() : m_NumLists(0), m_NumCells(0), m_Start(0), m_Stop(0), m_ProbeSetIndex(0), m_GroupIndex(0), m_WobbleSituation(0), m_AlleleCode(0), m_Channel(0), m_RepType(0), m_NumCellsPerList(0), m_Direction(0), m_pCells(NULL) { } ////////////////////////////////////////////////////////////////////// CCDFProbeGroupInformation::~CCDFProbeGroupInformation() { m_Cells.clear(); } ////////////////////////////////////////////////////////////////////// void CCDFProbeGroupInformation::MakeShallowCopy(CCDFProbeGroupInformation &orig) { m_NumLists = orig.m_NumLists; m_NumCells = orig.m_NumCells; m_Start = orig.m_Start; m_Stop = orig.m_Stop; m_ProbeSetIndex = orig.m_ProbeSetIndex; m_GroupIndex = orig.m_GroupIndex; m_Name = orig.m_Name; m_NumCellsPerList = orig.m_NumCellsPerList; m_Direction = orig.m_Direction; m_WobbleSituation = orig.m_WobbleSituation; m_AlleleCode = orig.m_AlleleCode; m_Channel = orig.m_Channel; m_RepType = orig.m_RepType; m_pCells = &orig.m_Cells; } ////////////////////////////////////////////////////////////////////// void CCDFProbeGroupInformation::GetCell(int cell_index, CCDFProbeInformation & info) { info = (*m_pCells)[cell_index]; } ////////////////////////////////////////////////////////////////////// CCDFQCProbeInformation::CCDFQCProbeInformation() : m_X(0), m_Y(0), m_PLen(0), m_PMProbe(0), m_Background(0) { } ////////////////////////////////////////////////////////////////////// CCDFQCProbeSetInformation::CCDFQCProbeSetInformation() : m_NumCells(0), m_QCProbeSetType(UnknownQCProbeSetType), m_pCells(NULL) { } ////////////////////////////////////////////////////////////////////// CCDFQCProbeSetInformation::~CCDFQCProbeSetInformation() { m_Cells.erase(m_Cells.begin(), m_Cells.end()); } ////////////////////////////////////////////////////////////////////// void CCDFQCProbeSetInformation::MakeShallowCopy(CCDFQCProbeSetInformation &orig) { m_NumCells = orig.m_NumCells; m_QCProbeSetType = orig.m_QCProbeSetType; m_pCells = &orig.m_Cells; } ////////////////////////////////////////////////////////////////////// void CCDFQCProbeSetInformation::GetProbeInformation(int index, CCDFQCProbeInformation & info) { info = (*m_pCells)[index];; } ////////////////////////////////////////////////////////////////////// CCDFProbeSetInformation::CCDFProbeSetInformation() : m_NumLists(0), m_NumGroups(0), m_NumCells(0), m_Index(0), m_ProbeSetNumber(0), m_ProbeSetType(UnknownProbeSetType), m_Direction(NoDirection), m_NumCellsPerList(0), m_pGroups(NULL) { } ////////////////////////////////////////////////////////////////////// CCDFProbeSetInformation::~CCDFProbeSetInformation() { m_Groups.erase(m_Groups.begin(), m_Groups.end()); } ////////////////////////////////////////////////////////////////////// void CCDFProbeSetInformation::MakeShallowCopy(CCDFProbeSetInformation &orig) { m_NumLists = orig.m_NumLists; m_NumGroups = orig.m_NumGroups; m_NumCells = orig.m_NumCells; m_Index = orig.m_Index; m_ProbeSetNumber = orig.m_ProbeSetNumber; m_ProbeSetType = orig.m_ProbeSetType; m_Direction = orig.m_Direction; m_NumCellsPerList = orig.m_NumCellsPerList; m_pGroups = &orig.m_Groups; } ////////////////////////////////////////////////////////////////////// void CCDFProbeSetInformation::GetGroupInformation(int index, CCDFProbeGroupInformation & info) { info.MakeShallowCopy((*m_pGroups)[index]); } ////////////////////////////////////////////////////////////////////// CCDFProbeSetNames::CCDFProbeSetNames() { } ////////////////////////////////////////////////////////////////////// CCDFProbeSetNames::~CCDFProbeSetNames() { Clear(); } ////////////////////////////////////////////////////////////////////// void CCDFProbeSetNames::Clear() { m_ProbeSetNames.erase(m_ProbeSetNames.begin(), m_ProbeSetNames.end()); } ////////////////////////////////////////////////////////////////////// std::string CCDFProbeSetNames::GetName(int index) const { return m_ProbeSetNames[index]; } ////////////////////////////////////////////////////////////////////// void CCDFProbeSetNames::Resize(int size) { m_ProbeSetNames.resize(size); } ////////////////////////////////////////////////////////////////////// void CCDFProbeSetNames::SetName(int index, std::string name) { m_ProbeSetNames[index] = name; } ////////////////////////////////////////////////////////////////////// std::string CCDFFileData::GetProbeSetName(int index) { if (iteratorReader.is_open() == false) { return m_ProbeSetNames.GetName(index); } int loc = (int)probeSetNamePos + (index*MAX_PROBE_SET_NAME_LENGTH); seekg(loc, std::ios::beg); char name[MAX_PROBE_SET_NAME_LENGTH + 1]; ReadFixedCString(iteratorReader, name, MAX_PROBE_SET_NAME_LENGTH); return name; } ////////////////////////////////////////////////////////////////////// void CCDFFileData::Close() { if (iteratorReader.is_open() == true) iteratorReader.close(); m_ProbeSets.clear(); m_QCProbeSets.clear(); m_ProbeSetNames.Clear(); } ////////////////////////////////////////////////////////////////////// bool CCDFFileData::IsXDACompatibleFile() { // Open the file. std::ifstream instr(m_FileName.c_str(), std::ios::in | std::ios::binary); if (!instr) return false; // Read the magic number from the file. int32_t magic=0; ReadInt32_I(instr, magic); bool bXDAFile = (magic == CDF_FILE_MAGIC_NUMBER); instr.close(); return bXDAFile; } ////////////////////////////////////////////////////////////////////// std::string CCDFFileData::GetGUID() { std::string guid = ""; bool bXDAFile = IsXDACompatibleFile(); if (m_Header.GetFormatVersion() == 0) { if (!ReadHeader()) return guid; } if ((bXDAFile && (m_Header.GetFormatVersion() >= 4)) || (!bXDAFile && (m_Header.GetFormatVersion() >= 6))) { guid = m_Header.GetGUID(); } return guid; } ////////////////////////////////////////////////////////////////////// std::string CCDFFileData::GetIntegrityMd5() { std::string integrityMd5 = ""; bool bXDAFile = IsXDACompatibleFile(); if (m_Header.GetFormatVersion() == 0) { if (!ReadHeader()) return integrityMd5; } if ((bXDAFile && (m_Header.GetFormatVersion() >= 4)) || (!bXDAFile && (m_Header.GetFormatVersion() >= 6))) { integrityMd5 = m_Header.GetIntegrityMd5(); } return integrityMd5; } ////////////////////////////////////////////////////////////////////// std::string CCDFFileData::GetChipType() { std::string chiptype; if (m_FileName.empty() == false) { bool bXDAFile = IsXDACompatibleFile(); if (m_Header.GetFormatVersion() == 0) { if (!ReadHeader()) return chiptype; } if ((bXDAFile && (m_Header.GetFormatVersion() < 4)) || (!bXDAFile && (m_Header.GetFormatVersion() < 6))) { int index = (int) m_FileName.rfind('\\'); if (index == -1) index = (int) m_FileName.rfind('/'); chiptype = m_FileName.c_str() + index + 1; chiptype.resize(chiptype.length()-4); } else { chiptype = m_Header.GetChipType(); } } return chiptype; } ////////////////////////////////////////////////////////////////////// std::vector CCDFFileData::GetChipTypes() { std::vector chiptypes; std::string chiptype; if (m_FileName.empty() == false) { bool bXDAFile = IsXDACompatibleFile(); if (m_Header.GetFormatVersion() == 0) { if (!ReadHeader()) return chiptypes; } if ((bXDAFile && (m_Header.GetFormatVersion() < 4)) || (!bXDAFile && (m_Header.GetFormatVersion() < 6))) { int index = (int) m_FileName.rfind('\\'); if (index == -1) index = (int) m_FileName.rfind('/'); chiptype = m_FileName.c_str() + index + 1; chiptype.resize(chiptype.length()-4); // The full file name (minus .cdf extension) is the default (1st) // chip type. This matches what GetChipType() returns. // ie: foo.bar.v1.r2.cdf -> foo.bar.v1.r2 chiptypes.push_back(chiptype); //We then add all substrings starting at zero and ending at '.' // ie: foo.bar.v1.r2.cdf -> foo.bar.v1, foo.bar, foo // ie: chiptypes contain foo.bar.v1.r2, foo.bar.v1, foo.bar, foo std::string::size_type pos = chiptype.rfind(".",chiptype.size()-1); while (pos != std::string::npos){ if(pos>0) chiptypes.push_back(chiptype.substr(0,pos)); pos = chiptype.rfind(".",pos-1); } } else { chiptypes = m_Header.GetChipTypes(); } } return chiptypes; } ////////////////////////////////////////////////////////////////////// bool CCDFFileData::Exists() { // Find the file stats. struct stat st; return (stat(m_FileName.c_str(), &st) == 0); } ////////////////////////////////////////////////////////////////////// bool CCDFFileData::ReadHeader() { // Read the header, close if failed. readHeaderOnly = true; if (Open() == false) { Close(); return false; } return true; } ////////////////////////////////////////////////////////////////////// GeneChipProbeSetType CCDFFileData::GetProbeSetType(int index) { if (iteratorReader.is_open() == false) { return m_ProbeSets[index].GetProbeSetType(); } uint32_t i_pos; uint32_t p_pos; // Get the probe set position from the index part of the file then seek to it. i_pos=(uint32_t) probeSetIndexPos + (index*sizeof(uint32_t)); iteratorReader.seekg(i_pos, std::ios::beg); // the seekg method will be called later. ReadUInt32_I(iteratorReader,p_pos); // skip to the record seekg(p_pos, std::ios::beg); // Now grab the probeset type. uint16_t usval; ReadUInt16_I(iteratorReader, usval); return (GeneChipProbeSetType)(usval); } ////////////////////////////////////////////////////////////////////// void CCDFFileData::GetProbeSetInformation(int index, CCDFProbeSetInformation & info) { if (iteratorReader.is_open() == false) { info.MakeShallowCopy(m_ProbeSets[index]); return; } uint32_t i_pos; uint32_t p_pos; // Is this a sequential read? // Are we just reading the next record? // If so no, need to seek around. // NOTE: Of course this expects the records to be next to each other. // |Rec:1......|Rec:2......|Rec:..... if ((m_probeSetIndex_last_valid!=1)||(index!=m_probeSetIndex_last+1)) { // Nope! Look it it in the index. i_pos = (uint32_t)probeSetIndexPos + (index*sizeof(uint32_t)); iteratorReader.seekg(i_pos, std::ios::beg); ReadUInt32_I(iteratorReader, p_pos); // now seek to the probe pos. iteratorReader.seekg(p_pos,std::ios::beg); } // remember which index we just read. m_probeSetIndex_last_valid=1; m_probeSetIndex_last=index; // Read the data uint16_t usval; uint8_t ucval; int32_t ival; info.m_Index = index; ReadUInt16_I(iteratorReader, usval); info.m_ProbeSetType = usval; ReadUInt8(iteratorReader, ucval); info.m_Direction = ucval; ReadInt32_I(iteratorReader, ival); info.m_NumLists = ival; ReadInt32_I(iteratorReader, ival); info.m_NumGroups = ival; ReadInt32_I(iteratorReader, ival); info.m_NumCells = ival; ReadInt32_I(iteratorReader, ival); info.m_ProbeSetNumber = ival; ReadUInt8(iteratorReader, ucval); info.m_NumCellsPerList = ucval; // Read the Groups CCDFProbeGroupInformation *pBlk; info.m_Groups.resize(info.m_NumGroups); info.m_pGroups = &info.m_Groups; for (int j=0; jm_GroupIndex = j; // Group info ReadInt32_I(iteratorReader, ival); pBlk->m_NumLists = ival; ReadInt32_I(iteratorReader, ival); pBlk->m_NumCells = ival; ReadUInt8(iteratorReader, ucval); pBlk->m_NumCellsPerList = ucval; ReadUInt8(iteratorReader, ucval); pBlk->m_Direction = ucval; ReadInt32_I(iteratorReader, ival); pBlk->m_Start = ival; ReadInt32_I(iteratorReader, ival); pBlk->m_Stop = ival; ReadFixedString(iteratorReader, pBlk->m_Name, MAX_PROBE_SET_NAME_LENGTH); if (m_Header.m_Version >= 2) { ReadUInt16_I(iteratorReader, usval); pBlk->m_WobbleSituation = usval; ReadUInt16_I(iteratorReader, usval); pBlk->m_AlleleCode = usval; } if (m_Header.m_Version >= 3) { ReadUInt8(iteratorReader, ucval); pBlk->m_Channel = ucval; ReadUInt8(iteratorReader, ucval); pBlk->m_RepType = ucval; } // Read the cells CCDFProbeInformation *pCell; pBlk->m_Cells.resize(pBlk->m_NumCells); pBlk->m_pCells = &pBlk->m_Cells; for (int k=0; km_NumCells; k++) { pCell = &pBlk->m_Cells[k]; // Cell info. ReadInt32_I(iteratorReader, ival); pCell->m_ListIndex = ival; ReadUInt16_I(iteratorReader, usval); pCell->m_X = usval; ReadUInt16_I(iteratorReader, usval); pCell->m_Y = usval; ReadInt32_I(iteratorReader, ival); pCell->m_Expos = ival; ReadUInt8(iteratorReader,ucval); pCell->m_PBase = ucval; ReadUInt8(iteratorReader,ucval); pCell->m_TBase = ucval; if (k==0) pBlk->m_Start = pCell->m_ListIndex; else if (k == pBlk->m_NumCells-1) pBlk->m_Stop = pCell->m_ListIndex; if (m_Header.m_Version >= 2) { ReadUInt16_I(iteratorReader, usval); pCell->m_ProbeLength = usval; ReadUInt16_I(iteratorReader, usval); pCell->m_ProbeGrouping = usval; } } } } ////////////////////////////////////////////////////////////////////// void CCDFFileData::GetQCProbeSetInformation(GeneChipQCProbeSetType qcType, CCDFQCProbeSetInformation & info) { bool bFound = false; for (int i=0; i CDF_FILE_VERSION_NUMBER) { m_strError = "The file does not appear to be the correct format."; return false; } // Read guid, integrity md5, and chip type if format version is 4 or above if (m_Header.m_Version >= 4) { ReadString_I(iteratorReader, m_Header.m_GUID); ReadFixedString(iteratorReader, m_Header.m_IntegrityMd5, INTEGRITY_MD5_LENGTH); uint8_t numChipTypes; std::string chiptype; m_Header.m_ChipType = ""; m_Header.m_ChipTypes.clear(); ReadUInt8(iteratorReader, numChipTypes); for (uint8_t count = 0; count < numChipTypes; count++) { ReadString_I(iteratorReader, chiptype); m_Header.m_ChipTypes.push_back(chiptype); if ((m_Header.m_ChipType.empty() == true) && (chiptype.find(".") == std::string::npos)) { m_Header.m_ChipType = chiptype; } } if (m_Header.m_ChipTypes.size() == 0) { m_strError = "The file does not contain chip types."; return false; } else if (m_Header.m_ChipType.empty() == true) { chiptype = m_Header.m_ChipTypes.at(0); std::string::size_type pos = chiptype.rfind(".", chiptype.size() - 1); while (pos != std::string::npos) { if(pos > 0) m_Header.m_ChipType = chiptype.substr(0, pos); pos = chiptype.rfind(".", pos - 1); } } } // Read the remaining header. uint16_t uval; ReadUInt16_I(iteratorReader, uval); m_Header.m_Cols = uval; ReadUInt16_I(iteratorReader, uval); m_Header.m_Rows = uval; ReadInt32_I(iteratorReader, ival); m_Header.m_NumProbeSets = ival; ReadInt32_I(iteratorReader, ival); m_Header.m_NumQCProbeSets = ival; ReadString_I(iteratorReader,m_Header.m_Reference); return true; } ////////////////////////////////////////////////////////////////////// bool CCDFFileData::ReadXDAFormat() { // Open the file. iteratorReader.open(m_FileName.c_str(), std::ios::in | std::ios::binary); // Check if open if (!iteratorReader) { m_strError = "Unable to open the file."; return false; } // Read the header. if (ReadXDAHeader() == false) return false; // Save the probe set name position probeSetNamePos = iteratorReader.tellg(); // Skip the probe set names seekg(MAX_PROBE_SET_NAME_LENGTH * m_Header.m_NumProbeSets, std::ios::cur); // remember the start of the qc index qcSetIndexPos = iteratorReader.tellg(); // Skip it seekg(m_Header.m_NumQCProbeSets * sizeof(uint32_t), std::ios::cur); // remember the start of the probeset index probeSetIndexPos = iteratorReader.tellg(); // invalidate m_probeSetIndex_last_valid=0; return true; } ////////////////////////////////////////////////////////////////////// bool CCDFFileData::ReadTextFormat() { // Open the file. std::ifstream instr; instr.open(m_FileName.c_str(), std::ios::in); // Check if open if (!instr) { m_strError = "Unable to open the file."; return false; } const int MAXLINELENGTH = 4096; char str[MAXLINELENGTH]; char *subStr; char *buffer = NULL; const char *CDFVERSION1 = "GC1.0"; const char *CDFVERSION2 = "GC2.0"; const char *CDFVERSION3 = "GC3.0"; const char *CDFVERSION4 = "GC4.0"; const char *CDFVERSION5 = "GC5.0"; const char *CDFVERSION6 = "GC6.0"; // Get the CDF section. ReadNextLine(instr, str, MAXLINELENGTH); if (strncmp( str, "[CDF]", 5) != 0) { m_strError = "Unknown file format."; return false; } // Get the version number. ReadNextLine(instr, str, MAXLINELENGTH); subStr=strchr(str,'=')+1; if ( strncmp( subStr, CDFVERSION1, strlen(CDFVERSION1)) == 0) m_Header.m_Version = 1; else if ( strncmp( subStr, CDFVERSION2, strlen(CDFVERSION2)) == 0) m_Header.m_Version = 2; else if ( strncmp( subStr, CDFVERSION3, strlen(CDFVERSION3)) == 0) m_Header.m_Version = 3; else if ( strncmp( subStr, CDFVERSION4, strlen(CDFVERSION4)) == 0) m_Header.m_Version = 4; else if ( strncmp( subStr, CDFVERSION5, strlen(CDFVERSION5)) == 0) m_Header.m_Version = 5; else if ( strncmp( subStr, CDFVERSION6, strlen(CDFVERSION6)) == 0) m_Header.m_Version = 6; if (m_Header.m_Version >= 6) { // Get the guid. ReadNextLine(instr, str, MAXLINELENGTH); subStr=strchr(str,'=')+1; m_Header.m_GUID = subStr; // Get the integrity md5. ReadNextLine(instr, str, MAXLINELENGTH); subStr=strchr(str,'=')+1; m_Header.m_IntegrityMd5 = subStr; } // Get the next section. ReadNextLine(instr, str, MAXLINELENGTH); // [Chip] ReadNextLine(instr, str, MAXLINELENGTH); // name if (m_Header.m_Version >= 6) { std::string chiptype; m_Header.m_ChipType = ""; m_Header.m_ChipTypes.clear(); ReadNextLine(instr, str, MAXLINELENGTH); // chiptype while (strncmp(str, "ChipType=", 9) == 0) { chiptype = strchr(str, '=')+1; m_Header.m_ChipTypes.push_back(chiptype); if ((m_Header.m_ChipType.empty() == true) && (chiptype.find(".") == std::string::npos)) { m_Header.m_ChipType = chiptype; } ReadNextLine(instr, str, MAXLINELENGTH); } if (m_Header.m_ChipType.empty() == true) { chiptype = m_Header.m_ChipTypes.at(0); std::string::size_type pos = chiptype.rfind(".", chiptype.size() - 1); while (pos != std::string::npos) { if(pos > 0) m_Header.m_ChipType = chiptype.substr(0, pos); pos = chiptype.rfind(".", pos - 1); } } } else ReadNextLine(instr, str, MAXLINELENGTH); // rows subStr = strchr(str, '=')+1; m_Header.m_Rows = atoi(subStr); ReadNextLine(instr, str, MAXLINELENGTH); // cols subStr=strchr(str,'=')+1; m_Header.m_Cols = atoi(subStr); ReadNextLine(instr, str, MAXLINELENGTH); // #ProbeSets subStr=strchr(str,'=')+1; m_Header.m_NumProbeSets = atoi(subStr); ReadNextLine(instr, str, MAXLINELENGTH); // max ProbeSet number m_Header.m_NumQCProbeSets = 0; if (m_Header.m_Version > 1) { ReadNextLine(instr, str, MAXLINELENGTH); // #qc ProbeSets subStr=strchr(str,'=')+1; m_Header.m_NumQCProbeSets = atoi(subStr); char strref[400000]; ReadNextLine(instr, strref, 400000); // The reference string. subStr=strchr(strref,'=')+1; m_Header.m_Reference = subStr; } // Stop if just reading the header. if (readHeaderOnly) return true; // Allocate for the probe set names. m_ProbeSetNames.Resize(m_Header.m_NumProbeSets); // Allocate for the QCProbeSets. CCDFQCProbeSetInformation *pQCProbeSet; m_QCProbeSets.resize(m_Header.m_NumQCProbeSets); // Read the QC probe sets for (int iQCProbeSet=0; iQCProbeSetm_QCProbeSetType = atoi( subStr); ReadNextLine(instr, str, MAXLINELENGTH); // #cells subStr=strchr(str,'=')+1; pQCProbeSet->m_NumCells = atoi( subStr); ReadNextLine(instr, str, MAXLINELENGTH); // cell header // Read the QC cells. int xqc; int yqc; int plenqc; CCDFQCProbeInformation *pQCCell; pQCProbeSet->m_Cells.resize(pQCProbeSet->m_NumCells); for (int iqccell=0; iqccellm_NumCells; iqccell++) { pQCCell = &pQCProbeSet->m_Cells[iqccell]; ReadNextLine(instr, str, MAXLINELENGTH); subStr = strchr(str, '=')+1; sscanf(subStr, "%d %d %*s %d", &xqc, &yqc, &plenqc); pQCCell->m_X = xqc; pQCCell->m_Y = yqc; pQCCell->m_PLen = plenqc; pQCCell->m_Background = 0; pQCCell->m_PMProbe = 0; } } // Allocate for the ProbeSets. int iProbeSet=0; CCDFProbeSetInformation *pProbeSet; m_ProbeSets.resize(m_Header.m_NumProbeSets); // Skip until the ProbeSet section is found NextProbeSet: while (1) { ReadNextLine(instr, str, MAXLINELENGTH); if (strlen(str) > 5 && strncmp(str, "[Unit", 5) == 0 && !strchr(str, '_')) break; if (instr.eof()) return true; } bool expectMisMatch = false; // ProbeSet info. pProbeSet = &m_ProbeSets[iProbeSet]; pProbeSet->m_Index = iProbeSet; ReadNextLine(instr, str, MAXLINELENGTH); // name (ignore) subStr=strchr(str,'=')+1; m_ProbeSetNames.SetName(iProbeSet, subStr); ReadNextLine(instr, str, MAXLINELENGTH); // direction subStr=strchr(str,'=')+1; pProbeSet->m_Direction = atoi(subStr); ReadNextLine(instr, str, MAXLINELENGTH); // # Lists subStr=strchr(str,'=')+1; int NumCellsPerList=0; if (sscanf(subStr, "%d %d", &pProbeSet->m_NumLists, &NumCellsPerList) != 2) NumCellsPerList = 0; pProbeSet->m_NumCellsPerList = NumCellsPerList; ReadNextLine(instr, str, MAXLINELENGTH); // # cells subStr=strchr(str,'=')+1; pProbeSet->m_NumCells = atoi(subStr); ReadNextLine(instr, str, MAXLINELENGTH); // ProbeSet number subStr=strchr(str,'=')+1; pProbeSet->m_ProbeSetNumber = atoi(subStr); ReadNextLine(instr, str, MAXLINELENGTH); // type subStr=strchr(str,'=')+1; int ival = atoi(subStr); typedef enum { UNKNOWN_TILE, STANDARD_TILE, BLOCK_TILE, GENE_EXPRESSION_TILE, CONTROL_TILE, STANDARD_ALTERNATE_TILE, STANDARD_VARIANT_TILE, UNIVERSAL_TILE, COPY_NUMBER_TILE, GENOTYPE_CONTROL_TILE, EXPRESSION_CONTROL_TILE, MARKER_TILE, MULTICHANNEL_MARKER_TILE } TilingTypes; switch (ival) { case STANDARD_TILE: case STANDARD_ALTERNATE_TILE: case STANDARD_VARIANT_TILE: pProbeSet->m_ProbeSetType = ResequencingProbeSetType; break; case BLOCK_TILE: pProbeSet->m_ProbeSetType = GenotypingProbeSetType; break; case GENE_EXPRESSION_TILE: pProbeSet->m_ProbeSetType = ExpressionProbeSetType; break; case UNIVERSAL_TILE: pProbeSet->m_ProbeSetType = TagProbeSetType; break; case COPY_NUMBER_TILE: pProbeSet->m_ProbeSetType = CopyNumberProbeSetType; break; case GENOTYPE_CONTROL_TILE: pProbeSet->m_ProbeSetType = GenotypeControlProbeSetType; break; case EXPRESSION_CONTROL_TILE: pProbeSet->m_ProbeSetType = ExpressionControlProbeSetType; break; case MARKER_TILE: pProbeSet->m_ProbeSetType = MarkerProbeSetType; break; case MULTICHANNEL_MARKER_TILE: pProbeSet->m_ProbeSetType = MultichannelMarkerProbeSetType; break; default: pProbeSet->m_ProbeSetType = UnknownProbeSetType; break; } ReadNextLine(instr, str, MAXLINELENGTH); // # blocks subStr=strchr(str,'=')+1; pProbeSet->m_NumGroups = atoi(subStr); // Determine the number of cells per List if not specified // in the CDF file. if (pProbeSet->m_NumCellsPerList == 0) { if ((pProbeSet->m_ProbeSetType == GenotypingProbeSetType) || (pProbeSet->m_ProbeSetType == ResequencingProbeSetType) || (pProbeSet->m_ProbeSetType == TagProbeSetType) || ((pProbeSet->m_ProbeSetType == UnknownProbeSetType) && ((pProbeSet->m_NumLists != 0) && ((pProbeSet->m_NumCells / pProbeSet->m_NumLists) == 4))) ) { pProbeSet->m_NumCellsPerList = 4; } else if (pProbeSet->m_ProbeSetType == ExpressionProbeSetType || pProbeSet->m_ProbeSetType == CopyNumberProbeSetType || pProbeSet->m_ProbeSetType == GenotypeControlProbeSetType || pProbeSet->m_ProbeSetType == ExpressionControlProbeSetType) { if(pProbeSet->m_NumLists != 0 && pProbeSet->m_NumCells / pProbeSet->m_NumLists < 255) pProbeSet->m_NumCellsPerList = pProbeSet->m_NumCells / pProbeSet->m_NumLists; else pProbeSet->m_NumCellsPerList = 1; } else { pProbeSet->m_NumCellsPerList = 1; } } // Sanity check for relationship of m_NumCellsPerList, m_NumCells and m_NumLists if(pProbeSet->m_NumLists != 0 && pProbeSet->m_NumCells / pProbeSet->m_NumLists < 255 && pProbeSet->m_NumCellsPerList != pProbeSet->m_NumCells / pProbeSet->m_NumLists) { assert(0 && "CCDFFileData::ReadTextFormat(): m_NumCellsPerList != pProbeSet->m_NumCells / pProbeSet->m_NumLists"); } // If this is an expression probe set and we have 2 cells per list set expectMisMatch flag. if(pProbeSet->m_ProbeSetType == ExpressionProbeSetType && pProbeSet->m_NumCellsPerList == 2) expectMisMatch = true; // Get the mutation type if block tile. ignore. if (pProbeSet->m_ProbeSetType == GenotypingProbeSetType && m_Header.m_Version > 1) ReadNextLine(instr, str, MAXLINELENGTH); // Read the blocks. CCDFProbeGroupInformation *pBlk; pProbeSet->m_Groups.resize(pProbeSet->m_NumGroups); for (int iGroup=0; iGroupm_NumGroups; iGroup++) { pBlk = &pProbeSet->m_Groups[iGroup]; pBlk->m_GroupIndex = iGroup; pBlk->m_ProbeSetIndex = iProbeSet; ReadNextLine(instr, str, MAXLINELENGTH); // section name - ignore ReadNextLine(instr, str, MAXLINELENGTH); // name subStr=strchr(str,'=')+1; pBlk->m_Name = subStr; if (pProbeSet->m_ProbeSetType == ExpressionProbeSetType) m_ProbeSetNames.SetName(iProbeSet, subStr); ReadNextLine(instr, str, MAXLINELENGTH); // block number - ignore. if (pProbeSet->m_ProbeSetType == MarkerProbeSetType && m_Header.m_Version > 3) { ReadNextLine(instr, str, MAXLINELENGTH); subStr=strchr(str,'=')+1; pBlk->m_WobbleSituation = (uint16_t) strtoul(subStr, &buffer, 10); ReadNextLine(instr, str, MAXLINELENGTH); subStr=strchr(str,'=')+1; pBlk->m_AlleleCode = (uint16_t) strtoul(subStr, &buffer, 10); } if (pProbeSet->m_ProbeSetType == MultichannelMarkerProbeSetType && m_Header.m_Version > 4) { ReadNextLine(instr, str, MAXLINELENGTH); subStr=strchr(str,'=')+1; pBlk->m_WobbleSituation = (uint16_t) strtoul(subStr, &buffer, 10); ReadNextLine(instr, str, MAXLINELENGTH); subStr=strchr(str,'=')+1; pBlk->m_AlleleCode = (uint16_t) strtoul(subStr, &buffer, 10); ReadNextLine(instr, str, MAXLINELENGTH); subStr=strchr(str,'=')+1; pBlk->m_Channel = (uint8_t) strtoul(subStr, &buffer, 10); ReadNextLine(instr, str, MAXLINELENGTH); subStr=strchr(str,'=')+1; pBlk->m_RepType = (uint8_t) strtoul(subStr, &buffer, 10); } ReadNextLine(instr, str, MAXLINELENGTH); // number of Lists. subStr=strchr(str,'=')+1; pBlk->m_NumLists = atoi(subStr); ReadNextLine(instr, str, MAXLINELENGTH); // number of cells subStr=strchr(str,'=')+1; pBlk->m_NumCells = atoi(subStr); ReadNextLine(instr, str, MAXLINELENGTH); // start position. subStr=strchr(str,'=')+1; pBlk->m_Start = atoi(subStr); ReadNextLine(instr, str, MAXLINELENGTH); // stop position subStr=strchr(str,'=')+1; pBlk->m_Stop = atoi(subStr); pBlk->m_NumCellsPerList = pProbeSet->m_NumCellsPerList; if ((pProbeSet->m_ProbeSetType == GenotypingProbeSetType && m_Header.m_Version > 2) || ((pProbeSet->m_ProbeSetType == MarkerProbeSetType || pProbeSet->m_ProbeSetType == CopyNumberProbeSetType) && m_Header.m_Version > 3) || (pProbeSet->m_ProbeSetType == MultichannelMarkerProbeSetType && m_Header.m_Version > 4)) { ReadNextLine(instr, str, MAXLINELENGTH); subStr=strchr(str,'=')+1; pBlk->m_Direction = atoi(subStr); } else pBlk->m_Direction = pProbeSet->m_Direction; // Read the cells. ReadNextLine(instr, str, MAXLINELENGTH); // header CCDFProbeInformation cell; pBlk->m_Cells.resize(pBlk->m_NumCells); char unusedstr[64]; int unusedint; unsigned int cellIndex; int x,y; for (int iCell=0; iCellm_NumCells; iCell++) { ReadNextLine(instr, str, MAXLINELENGTH); subStr = strchr(str, '=')+1; int scanCount = 0; if (m_Header.m_Version > 3) { scanCount = sscanf(subStr, "%d %d %s %s %s %d %hu %d %*c %c %c %d %d %hu", &x, &y, unusedstr, unusedstr, unusedstr, &cell.m_Expos, &cell.m_ProbeLength, &unusedint, &cell.m_PBase, &cell.m_TBase, &cell.m_ListIndex, &unusedint, &cell.m_ProbeGrouping); if(scanCount != 13) { m_strError = "Didn't get 13 entries in scan."; return false; } } else { scanCount = sscanf(subStr, "%d %d %s %s %s %d %d %*c %c %c %d", &x, &y, unusedstr, unusedstr, unusedstr, &cell.m_Expos, &unusedint, &cell.m_PBase, &cell.m_TBase, &cell.m_ListIndex); if(scanCount != 10) { m_strError = "Didn't get 10 entries in scan."; return false; } } cell.m_X = x; cell.m_Y = y; if (pProbeSet->m_ProbeSetType == ExpressionProbeSetType) { cellIndex = (iCell / pProbeSet->m_NumCellsPerList) * pProbeSet->m_NumCellsPerList; // If we are expecting pairs of PM/MM then we want the order // in m_Cells to be PM first and MM second. if (expectMisMatch && cell.m_PBase == cell.m_TBase) ++cellIndex; } else { cellIndex = (iCell / pProbeSet->m_NumCellsPerList) * pProbeSet->m_NumCellsPerList; cellIndex += (pProbeSet->m_NumCellsPerList - (iCell % pProbeSet->m_NumCellsPerList) - 1); } if(cellIndex >= pBlk->m_Cells.size()) { assert(0 && "CCDFFileData::ReadTextFormat(): cellIndex cannot be larger that pBlk->m_Cells.size()" ); } pBlk->m_Cells[cellIndex] = cell; if (iCell==0) pBlk->m_Start = cell.m_ListIndex; else if (iCell == pBlk->m_NumCells-1) pBlk->m_Stop = cell.m_ListIndex; } } ++iProbeSet; goto NextProbeSet; } ////////////////////////////////////////////////////////////////////// affxparser/src/fusion/file/CDFFileData.h0000644000175200017520000007346214516003651021122 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #if !defined(AFX_CDFFILEDATA_H__BDEC0B48_58F6_480C_A988_05355DCF0BA4__INCLUDED_) #define AFX_CDFFILEDATA_H__BDEC0B48_58F6_480C_A988_05355DCF0BA4__INCLUDED_ /*! \file CDFFileData.h This file provides CDF file reading and storage capabilities. */ ////////////////////////////////////////////////////////////////////// #ifdef _MSC_VER #pragma warning(disable: 4786) // identifier was truncated in the debug information #include #endif ////////////////////////////////////////////////////////////////////// // #include "portability/affy-base-types.h" // #include #include #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affxcdf { ////////////////////////////////////////////////////////////////////// /*! The maximum storage length for a probe set name in the CDF file. */ #define MAX_PROBE_SET_NAME_LENGTH 64 /*! The storage length for integrity md5 (hex) in the CDF file. */ #define INTEGRITY_MD5_LENGTH 32 //////////////////////////////////////////////////////////////////// /*! Storage for the header in a CDF file. */ class CCDFFileHeader { protected: /*! The magic number in an XDA file */ int m_Magic; /*! The version number */ int m_Version; /*! The GUID */ std::string m_GUID; /*! The integrity md5 */ std::string m_IntegrityMd5; /*! The chip type - array type name without version (only apply to XDA version 4 or above */ std::string m_ChipType; /*! The array of chip types - all array type names with and without versions (only apply to XDA version 4 or above */ std::vector m_ChipTypes; /*! The number of probe sets defined in the file */ int m_NumProbeSets; /*! The number of QC probe sets in the file */ int m_NumQCProbeSets; /*! The reference sequence (used for resequencing arrays only) */ std::string m_Reference; /*! The number of feature columns in the array */ unsigned short m_Cols; /*! The number of feature rows in the array */ unsigned short m_Rows; /*! Friend to the parent class */ friend class CCDFFileData; public: /*! Get CDF Format Version * @return CDF Format Version */ int GetFormatVersion() const { return m_Version; } /*! Get GUID * @return GUID */ std::string GetGUID() const { return m_GUID; } /*! Get integrity md5 * @return The integrity md5 */ std::string GetIntegrityMd5() const { return m_IntegrityMd5; } /*! Get chip type - array type name without version * @return The chip type. */ std::string GetChipType() const { return m_ChipType; } /*! Get chip types - array type names with and without version * @return The chip types. */ std::vector GetChipTypes() const { return m_ChipTypes; } /*! Gets the number of feature columns in the array. * @return The number of columns. */ int GetCols() const { return m_Cols; } /*! Gets the number of feature rows in the array. * @return The number of rows. */ int GetRows() const { return m_Rows; } /*! Gets the number of probe sets. * @return The number of probe sets. */ int GetNumProbeSets() const { return m_NumProbeSets; } /*! Gets the number of QC probe sets. * @return The number of QC probe sets. */ int GetNumQCProbeSets() const { return m_NumQCProbeSets; } /*! Gets the reference sequence (for resequencing arrays only). * @return The reference sequence. */ std::string &GetReference() { return m_Reference; } /*! Constructor */ CCDFFileHeader(); }; //////////////////////////////////////////////////////////////////// /*! Defines the type of probe sets stored in the CDF file */ enum GeneChipProbeSetType { /*! Unknown probe set */ UnknownProbeSetType, /*! Expression probe set */ ExpressionProbeSetType, /*! Genotyping probe set */ GenotypingProbeSetType, /*! Resequencing probe set */ ResequencingProbeSetType, /*! Tag probe set */ TagProbeSetType, /*! Copy number probe set. */ CopyNumberProbeSetType, /*! Genotype control probe set. */ GenotypeControlProbeSetType, /*! Expression control probe set. */ ExpressionControlProbeSetType, /*! Polymorphic marker probe set. */ MarkerProbeSetType, /*! Multichannel marker probe set. */ MultichannelMarkerProbeSetType }; //////////////////////////////////////////////////////////////////// /*! Defines the direction of a probe set or group */ enum DirectionType { /*! No direction specified */ NoDirection, /*! Sense * All probes in the probe set (or probe group) have the sense direction */ SenseDirection, /*! Anti sense * All probes in the probe set (or probe group) have the antisense direction */ AntiSenseDirection, /* !Either * Some probes in the probe set (or probe group) have antisense direction, * others have sense direction */ EitherDirection }; //////////////////////////////////////////////////////////////////// /*! Defines the type of probe replication of a group */ enum ReplicationType { /*! Unspecified replication type */ UnknownRepType, /*! Different * All probes in the probe group have different sequences */ DifferentRepType, /*! Mixed * Some probes in the probe group have identical sequences */ MixedRepType, /* Identical * All probes in the probe group have identical sequences */ IdenticalRepType }; //////////////////////////////////////////////////////////////////// /*! This class provides storage for an individual probe in a CDF file */ class CCDFProbeInformation { protected: /*! The index of the probes probe pair or quartet (probe list). Also known as the atom position. */ int m_ListIndex; /*! The expos value in the CDF file, this can either be a zero based index equal to the * list index or the exon position. */ int m_Expos; /*! The X coordinate */ unsigned short m_X; /*! The Y coordinate */ unsigned short m_Y; /*! The probes base at the interrogation position */ char m_PBase; /*! The targets base at the interrogation position */ char m_TBase; /*! The probe length */ unsigned short m_ProbeLength; /*! The probe grouping */ unsigned short m_ProbeGrouping; /*! Friend to the parent */ friend class CCDFProbeGroupInformation; /*! Friend to the probe set */ friend class CCDFProbeSetInformation; /*! Friend to the top level class */ friend class CCDFFileData; public: /*! Returns the list index. * @return The list index. * This is an internal index used by Affymetrix chip design. */ int GetListIndex() const { return m_ListIndex; } /*! Returns the expos value. * @return The expos value. * This is the position of the probe sequence in a longer sequence. (Note * in some arrays this value may instead correspond to the GetListIndex value). * In expression arrays the longer sequence can be the sequence from which * the probe is selected. In genotyping arrays the longer sequence can be * the sequence containing a polymorphic SNP. More than one probe can have * the same expos value, e.g., in arrays containing perfect match/mismatch * pairs, both probes in the pair will have the same expos value. */ int GetExpos() const { return m_Expos; } /*! Returns the X coordinate of the probe in the physical array. * @return The X coordinate. */ int GetX() const { return m_X; } /*! Returns the Y coordinate of the probe in the physical array. * @return The Y coordinate. */ int GetY() const { return m_Y; } /*! Returns the probes base at the interrogation position. * @return The probes base at the interrogation position. */ char GetPBase() const { return m_PBase; } /*! Returns the targets base at the interrogation position. * @return The targets base at the interrogation position. */ char GetTBase() const { return m_TBase; } /*! Returns the length of the probe. * @return The probe length. * This is not applicable to XDA CDF file version 1. */ unsigned short GetProbeLength() const { return m_ProbeLength; } /*! Returns the grouping of the probe in the physical array. * @return The probe grouping. * This is not applicable to XDA CDF file version 1. */ unsigned short GetProbeGrouping() const { return m_ProbeGrouping; } /*! Constructor */ CCDFProbeInformation(); }; /*! The size of the probe object as stored in the XDA CDF file version 1. */ #define PROBE_SIZE (4+4+2+2+1+1) /*! The size of the probe object as stored in the XDA CDF file version 2, 3 and 4. */ #define PROBE_SIZE_V2 (4+4+2+2+1+1+2+2) //////////////////////////////////////////////////////////////////// /*! This class provides storage for a group of probes, also known as a block. */ class CCDFProbeGroupInformation { protected: /*! The number of probe pairs or probe quartets in the group. */ int m_NumLists; /*! The number of probes in the group. */ int m_NumCells; /*! The first probes list index value. */ int m_Start; /*! The last probes list index value. */ int m_Stop; /*! The probe set index associated with the group. */ int m_ProbeSetIndex; /*! The group index. */ int m_GroupIndex; /*! The name of the group. */ std::string m_Name; /*! The wobble situation. */ unsigned short m_WobbleSituation; /*! The allele code. */ unsigned short m_AlleleCode; /*! The channel. */ unsigned char m_Channel; /*! The probe replication type. */ unsigned char m_RepType; /*! The number of cells per list (1 or 2 for expression and genotyping, 4 for resequencing). */ unsigned char m_NumCellsPerList; /*! The direction of the target that the probes are interrogating. */ unsigned char m_Direction; /*! The probes in the group */ std::vector m_Cells; /*! A pointer to the probes. This is used when memory mapping is used. */ std::vector *m_pCells; /*! Friend to the parent class. */ friend class CCDFProbeSetInformation; /*! Friend to the top level class. */ friend class CCDFFileData; /*! Copies the data in the input object to the member variables. * @param orig The group to copy. */ void MakeShallowCopy(CCDFProbeGroupInformation &orig); public: /*! Gets the groups direction. * @return The groups direction. * See the DirectionType enumeration for details */ DirectionType GetDirection() const { return (DirectionType)m_Direction; } /*! Gets the number of lists (atoms) in the group. * @return The number of lists (atoms) in the group. * In expression and genotyping arrays probes may be arranged in singletons or paired * with a mismatch probe. In resequencing arrays probes may be arranged in quartes. * Note that multiple arrangements may be present on the same * array, but not within a probe group (or probe set). * Each such singleton or pair or quartet is a "list" or "atom." */ int GetNumLists() const { return m_NumLists; } /*! Gets the number of probes in the group. * @return The number of probes in the group. */ int GetNumCells() const { return m_NumCells; } /*! Gets the number of probes per list. * @return The number of probes per list. * Always constant within a probe group or probe set. */ int GetNumCellsPerList() const { return (int)m_NumCellsPerList; } /*! Gets the start list index value. * @return The start list index value. * This is the minimum value returned by GetListIndex() * This is an internal index used by Affymetrix chip design. */ int GetStart() const { return m_Start; } /*! Gets the stop list index value. * @return The stop list index value. * This is the maximum value returned by GetListIndex() * This is an internal index used by Affymetrix chip design. */ int GetStop() const { return m_Stop; } /*! Gets the group name. * @return The group name. */ std::string GetName() { return m_Name; } /*! Gets the wobble situation. * @return The wobble situation. * This is not applicable to XDA CDF file version 1. */ unsigned short GetWobbleSituation() const { return m_WobbleSituation; } /*! Gets the allele code. * @return The allele code. * This is not applicable to XDA CDF file version 1. */ unsigned short GetAlleleCode() const { return m_AlleleCode; } /*! Gets the channel. * @return The channel. * This is not applicable to XDA CDF file version 1 and 2. */ unsigned char GetChannel() const { return m_Channel; } /*! Gets the probe replication type. * @return The probe replication type. * See the ReplicationType enumeration for details * This is not applicable to XDA CDF file version 1 and 2. */ ReplicationType GetRepType() const { return (ReplicationType) m_RepType; } /*! Retrieves the probe object given a zero-based index. * @param cell_index zero-based index in the probe group to the probe of interest. * @param info The returned probe data. * Note: this has nothing to do with list index returned by GetListIndex */ void GetCell(int cell_index, CCDFProbeInformation & info); /*! Constructor */ CCDFProbeGroupInformation(); /*! Destructor */ ~CCDFProbeGroupInformation(); }; /*! This is the size of the object in a binary CDF file version 1. */ #define PROBE_GROUP_SIZE (MAX_PROBE_SET_NAME_LENGTH+4+4+4+4+1+1) /*! This is the size of the object in a binary CDF file version 2. */ #define PROBE_GROUP_SIZE_V2 (MAX_PROBE_SET_NAME_LENGTH+4+4+4+4+1+1+2+2) /*! This is the size of the object in a binary CDF file version 3 and 4. */ #define PROBE_GROUP_SIZE_V3 (MAX_PROBE_SET_NAME_LENGTH+4+4+4+4+1+1+2+2+1+1) //////////////////////////////////////////////////////////////////// /*! This class provides storage for a probe set. */ class CCDFProbeSetInformation { protected: /*! The number of lists (atoms) in the probe set. */ int m_NumLists; /*! The number of groups (blocks) in the probe set. */ int m_NumGroups; /*! The number of probes in the set. */ int m_NumCells; /*! An index for the probe set.*/ int m_Index; /*! An arbitrary number assigned to the probe set. */ int m_ProbeSetNumber; /*! The type of probe set. */ unsigned short m_ProbeSetType; /*! The direction of the target that the probes are interrogating. */ unsigned char m_Direction; /*! The number of probes per list. */ unsigned char m_NumCellsPerList; /*! The groups in the set. */ std::vector m_Groups; /*! A pointer to the groups. This is used when memory mapping is used. */ std::vector *m_pGroups; /*! Friend to the top level class. */ friend class CCDFFileData; /*! Copies the data in the input object to the member variables. * @param orig The probe set to copy. */ void MakeShallowCopy(CCDFProbeSetInformation &orig); public: /*! Gets the probe set type. * @return The probe set type. * see GeneChipProbeSetType for details */ GeneChipProbeSetType GetProbeSetType() const { return (GeneChipProbeSetType)m_ProbeSetType; } /*! Gets the probe sets direction. * @return The probe sets direction. * See the DirectionType enumeration for details */ DirectionType GetDirection() const { return (DirectionType)m_Direction; } /*! Gets the number of lists (atoms) in the set. * @return The number of lists (atoms) in the set. * See GetNumLists for probe group for details */ int GetNumLists() const { return m_NumLists; } /*! The number of groups in the set. * @return The number of groups in the set. * In genotyping arrays it is convenient to group organize probes that interrogate * a polymorphic SNP into subsets that interrogate a particular allele and have * the same direction (i.e., sense or antisense). Each grouping is a "probe group." * Affymetrix also uses the term "block" for a "probe group." */ int GetNumGroups() const { return m_NumGroups; } /*! The number of probes in the set. * @return The number of probes in the set. */ int GetNumCells() const { return m_NumCells; } /*! Gets the number of probes per list. * @return The number of probes per list. * Always constant within a probe group or probe set. */ int GetNumCellsPerList() const { return (int)m_NumCellsPerList; } /*! Gets the probe set number. * @return The probe set number. * This is an internal value used by Affymetrix chip design. */ int GetProbeSetNumber() const { return m_ProbeSetNumber; } /*! Gets a group object. * @param index The index to the group of interest. * @param info The returned group data. */ void GetGroupInformation(int index, CCDFProbeGroupInformation & info); /*! Constructor */ CCDFProbeSetInformation(); /*! Destructor */ ~CCDFProbeSetInformation(); }; /*! This is the size of the object in a binary CDF file. */ #define PROBE_SET_SIZE (4+4+4+4+2+1+1) //////////////////////////////////////////////////////////////////// /*! This class provides storage for the list of probe set names. */ class CCDFProbeSetNames { public: /*! Friend to the top level class. */ friend class CCDFFileData; /*! Constructor */ CCDFProbeSetNames(); /*! Destructor */ ~CCDFProbeSetNames(); /*! Deallocates memory used by the class. */ void Clear(); protected: /*! Array of probe set names, used if not memory mapping. */ std::vector m_ProbeSetNames; /*! Resizes the probe set name array. */ void Resize(int size); /*! Stores the name of the probe set in the internal array. */ void SetName(int index, std::string name); public: /*! Gets the probe set name. * @param index The zero-based index to the probe set name of interest. * @return The name of the probe set. */ std::string GetName(int index) const; }; //////////////////////////////////////////////////////////////////// /*! This class provides storage for QC probes */ class CCDFQCProbeInformation { public: /*! Constructor */ CCDFQCProbeInformation(); protected: /*! The X coordinate of the probe */ unsigned short m_X; /*! The Y coordinate of the probe */ unsigned short m_Y; /*! The probe length. This value may be 1 for non-synthesized features */ unsigned char m_PLen; /*! Flag indicating if the probe is a perfect match probe. */ unsigned char m_PMProbe; /*! Flag indicating if the probe is used for background calculations (blank feature). */ unsigned char m_Background; /*! Friend to the parent class */ friend class CCDFQCProbeSetInformation; /*! Friend to the top level CDF class */ friend class CCDFFileData; public: /*! Gets the X coordinate of the probe. * @return The X coordinate. */ int GetX() const { return m_X; } /*! Gets the Y cooridnate of the probe. * @return The Y coordinate. */ int GetY() const { return m_Y; } /*! Gets the probe length. * @return The probe length. This value may be 1 for non-synthesized features. */ int GetPLen() const { return m_PLen; } /*! Gets the flag indicating if the probe is a perfect match probe. * @return The flag indicating if the probe is a perfect match probe */ bool IsPerfectMatchProbe() const { return (m_PMProbe == 1 ? true : false); } /*! Gets a flag indicating if the probe is used for background calculations (blank feature). * @return Flag indicating if the probe is used for background calculations (blank feature). */ bool IsBackgroundProbe() const { return (m_Background == 1 ? true : false); } }; /*! This is the size of the object in a binary CDF file. */ #define QC_PROBE_SIZE (2+2+1+1+1) //////////////////////////////////////////////////////////////////// /*! The type of QC probe set */ enum GeneChipQCProbeSetType { /*! Unknown probe set */ UnknownQCProbeSetType, /*! Probes used for the checker board patterns for antisense arrays. */ CheckerboardNegativeQCProbeSetType, /*! Probes used for the checker board patterns for sense arrays. */ CheckerboardPositiveQCProbeSetType, /*! Hybridization control probes for antisense arrays. */ HybNegativeQCProbeSetType, /*! Hybridization control probes for sense arrays. */ HybPositiveQCProbeSetType, /*! Probes used for text patterns for antisense arrays. */ TextFeaturesNegativeQCProbeSetType, /*! Probes used for text patterns for sense arrays. */ TextFeaturesPositiveQCProbeSetType, /*! Central probes for antisense arrays. */ CentralNegativeQCProbeSetType, /*! Central probes for sense arrays. */ CentralPositiveQCProbeSetType, /*! Gene expression control probes for antisense arrays. */ GeneExpNegativeQCProbeSetType, /*! Gene expression control probes for sense arrays. */ GeneExpPositiveQCProbeSetType, /*! Cycle fidelity probes for antisense arrays. */ CycleFidelityNegativeQCProbeSetType, /*! Cycle fidelity probes for sense arrays. */ CycleFidelityPositiveQCProbeSetType, /*! Central cross probes for antisense arrays. */ CentralCrossNegativeQCProbeSetType, /*! Central cross probes for sense arrays. */ CentralCrossPositiveQCProbeSetType, /*! X-hyb control probes for antisense arrays. */ CrossHybNegativeQCProbeSetType, /*! X-hyb control probes for sense arrays. */ CrossHybPositiveQCProbeSetType, /*! Space normalization probes for antisense arrays. */ SpatialNormalizationNegativeQCProbeSetType, /*! Space normalization probes for sense arrays. */ SpatialNormalizationPositiveQCProbeSetType }; //////////////////////////////////////////////////////////////////// /*! This class provides storage for the probes in a QC probe set. */ class CCDFQCProbeSetInformation { protected: /*! The number of probes in the set. */ int m_NumCells; /*! The type of QC probes. */ unsigned short m_QCProbeSetType; /*! The array of probes. */ std::vector m_Cells; /*! The array of probes. */ std::vector *m_pCells; /*! Friend to the top level CDF class. */ friend class CCDFFileData; /*! Copies the data in the input object to the member variables. * @param orig The probe set to copy. */ void MakeShallowCopy(CCDFQCProbeSetInformation &orig); public: /*! Gets the probe set type. * @return The probe set type. */ GeneChipQCProbeSetType GetQCProbeSetType() const { return (GeneChipQCProbeSetType)m_QCProbeSetType; } /*! Gets the number of probes in the set. * @return The number of probes in the set. */ int GetNumCells() const { return m_NumCells; } /*! Gets the information about a single probe in the set. * @param index The zero-based index to the probe of interest. * @param info The information about the probe. */ void GetProbeInformation(int index, CCDFQCProbeInformation & info); /*! Constructor */ CCDFQCProbeSetInformation(); /*! Destructor */ ~CCDFQCProbeSetInformation(); }; /*! This is the size of the QC probe set object in a binary CDF file. */ #define QC_PROBE_SET_SIZE (4+2) //////////////////////////////////////////////////////////////////// /*! This class provides storage and reading capabilities for a CDF file. */ class CCDFFileData { protected: /*! The position of the probe set names in the XDA file. */ std::ios::pos_type probeSetNamePos; /*! The position of the QC probe set index array in the XDA file. */ std::ios::pos_type qcSetIndexPos; /*! The position of the probe set index array in the XDA file. */ std::ios::pos_type probeSetIndexPos; /*! What was the last probesetindex read? * This state allows us to handle sequential reads quickly. (no seeking.) */ uint32_t m_probeSetIndex_last; /*! 1 if like m_probeSetIndex_last is valid. * The seekg method clears this. */ uint32_t m_probeSetIndex_last_valid; /*! Use this seekg method when seeking with the iteratorReader. * It updates the state needed for sequenial reads. */ void seekg(uint32_t pos, const std::ios_base::seekdir& dir) { // invalidate the last probe state m_probeSetIndex_last_valid=0; // m_probeSetIndex_last=0; // we dont need to clear this as we just cleared "_valid" // and do the seek iteratorReader.seekg(pos,dir); } /*! The file stream for the probe set information iterator. */ std::ifstream iteratorReader; /*! Flag to indicate that only the header part of the file is to be read. */ bool readHeaderOnly; /*! The file header object. */ CCDFFileHeader m_Header; /*! The list of probe set names. */ CCDFProbeSetNames m_ProbeSetNames; /*! An array of probe sets. */ std::vector m_ProbeSets; /*! An array of QC probe sets. */ std::vector m_QCProbeSets; /*! The CDF file name (full path). */ std::string m_FileName; /*! A string to hold an error message upon read failures. */ std::string m_strError; /*! Opens the file for reading. * @return True if successful. */ bool Open(); /*! Reads a text format CDF file. * @return True if successful. */ bool ReadTextFormat(); /*! Reads an XDA format CDF file. * @return True if successful. */ bool ReadXDAFormat(); /*! Reads the header of an XDA format CDF file. * @return True if successful. */ bool ReadXDAHeader(); public: /*! Sets the name of the file. * @param name The full path of the CDF file. */ void SetFileName(const char *name) { m_FileName = name; } /*! Gets the name of the file. * @return The full path of the CDF file. */ std::string GetFileName() const { return m_FileName; } /*! Gets the header object. * @return The CDF file header object. */ CCDFFileHeader &GetHeader() { return m_Header; } /*! Get GUID * @return GUID */ std::string GetGUID(); /*! Get integrity md5 * @return The integrity md5 */ std::string GetIntegrityMd5(); /*! Gets the error string. * @return A string describing the last read error. */ std::string GetError() const { return m_strError; } /*! Gets the name of a probe set. * @param index The zero-based index to the probe set name of interest. * @return The probe set name. */ std::string GetProbeSetName(int index); /*! Gets the chip type (probe array type) of the CDF file. * @return The chip type. This is just the name (without extension) of the CDF file. */ std::string GetChipType(); /*! Gets the chip types (probe array type) of the CDF file. Allow substrings deliminated by '.' * @return vector of chip types */ std::vector GetChipTypes(); /*! Reads the entire file. * @return True if successful. */ bool Read(); /*! Reads the header of the file only. * @return True if successful. */ bool ReadHeader(); /*! Checks if the file exists. * @return True if the file exists. */ bool Exists(); /*! Deallocates memory and closes any file handles. */ void Close(); /*! Determines if a CDF file is of the XDA (binary) format. * @return True if XDA format. */ bool IsXDACompatibleFile(); /*! Gets the probe set type for non-qc probe sets. * @param index The zero-based index to the probe set of interest. * @return The type of probe set. */ GeneChipProbeSetType GetProbeSetType(int index); /*! Gets the probe set information. * @param index The zero-based index to the probe set of interest. * @param info The probe set information. * @return The probe set information. */ void GetProbeSetInformation(int index, CCDFProbeSetInformation & info); /*! Gets the QC probe set information by index. * @param index The zero-based index to the QC probe set of interest. * @param info The QC probe set information. * @return The QC probe set information. */ void GetQCProbeSetInformation(int index, CCDFQCProbeSetInformation & info); /*! Gets the QC probe set information by type. * @param qcType The type of QC probe set to retrieve. * @param info The QC probe set information. * @return The QC probe set information. */ void GetQCProbeSetInformation(GeneChipQCProbeSetType qcType, CCDFQCProbeSetInformation & info); /*! Constructor */ CCDFFileData(); /*! Destructor */ ~CCDFFileData(); }; //////////////////////////////////////////////////////////////////// } // namespace //////////////////////////////////////////////////////////////////// #endif // !defined(AFX_CDFFILEDATA_H__BDEC0B48_58F6_480C_A988_05355DCF0BA4__INCLUDED_) affxparser/src/fusion/file/CELFileData.cpp0000644000175200017520000030663214516003651021462 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "file/CELFileData.h" // #include "file/FileIO.h" // #include #include #include #include #include #include #include #include #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #ifdef HAVE_SNPRINTF // If not using visual c++'s _snprintf include snprintf. extern "C" { #include "snprintf.h" } #else // otherwise use _snprintf where normally use snprintf. #define snprintf _snprintf #endif // HAVE_SNPRINTF #endif // _MSC_VER using namespace affxcel; ////////////////////////////////////////////////////////////////////// /// Delimiter character in DAT header #define DELIMCHAR 0x14 /// Minimum number of characters in cell data row in text format #define MIN_CELLSTR 2 /// Xda format identifier #define CELL_FILE_MAGIC_NUMBER 0x00000040 // 64 /// Version number for xda format #define CELL_FILE_VERSION_NUMBER 0x00000004 // 4 /// Trascriptome bcel format identifier #define BCEL_HEADER_BYTES "BCEL\r\n\032\n" /// Size of trascriptome bcel format identifier #define BCEL_HEADER_LEN 8 /// Header information size in trascriptome bcel format #define STRUCT_SIZE_BCEL_HEADER_INFO 66 /// Feature data size in trascriptome bcel format #define STRUCT_SIZE_FEATURE_DATA 5 /// Masked cells and outlier data size in trascriptome bcel format #define STRUCT_SIZE_XY_PAIR 8 /// Size of anonymous 4-byte identifier in trascriptome bcel format #define BCEL_CHUNK_LEN_SIZE 4 /// Size of section name identifier in trascriptome bcel format #define BCEL_CHUNK_NAME_SIZE 4 /// Size of section separator in trascriptome bcel format #define BCEL_CHUNK_CRC_SIZE 4 /// Header section identifier in trascriptome bcel format #define BCEL_CHUNK_HEAD "HEAD" /// DAT header section identifier in trascriptome bcel format #define BCEL_CHUNK_DTHD "DTHD" /// Algorithm section identifier in trascriptome bcel format #define BCEL_CHUNK_ALGM "ALGM" /// Algorithm parameter section identifier in trascriptome bcel format #define BCEL_CHUNK_ALPR "ALPR" /// Intensity section identifier in trascriptome bcel format #define BCEL_CHUNK_INTY "INTY" /// Masked cell section identifier in trascriptome bcel format #define BCEL_CHUNK_MASK "MASK" /// Outlier section identifier in trascriptome bcel format #define BCEL_CHUNK_OUTL "OUTL" /// End of file identifier in trascriptome bcel format #define BCEL_CHUNK_TAIL "TAIL" /// Compact cel format identifier #define CCEL_HEADER_BYTES "CCEL\r\n\128\n" /// Unspported version of compact cel format identifier #define OLD_CCEL_HEADER_BYTES "CCEL\r\n\064\n" /// Size of compact cel format identifier #define CCEL_HEADER_LEN 8 #ifndef WIN32 /// Line separator for unix/linux #define LINE_SEPARATOR "\n" #else /// Line separator for Windows #define LINE_SEPARATOR "\r\n" #endif #ifndef PAGE_SIZE /// Page size used for memory mapping in non Windows environment #define PAGE_SIZE (getpagesize()) #endif #ifndef PAGE_MASK /// Page mask used for memory mapping in non Windows environment #define PAGE_MASK ~(PAGE_SIZE-1) #endif #ifndef PAGE_TRUNC /// Page truncation pointer used for memory mapping in non Windows environment #define PAGE_TRUNC(ptr) (ptr&(PAGE_MASK)) #endif /////////////////////////////////////////////////////////////////////////////// /// public constructor CCELFileHeaderData /// \brief Default constructor /// /// @return void /////////////////////////////////////////////////////////////////////////////// CCELFileHeaderData::CCELFileHeaderData() { m_Magic = 0; m_Version = CELL_FILE_VERSION_NUMBER; m_nCols = 0; m_nRows = 0; m_nCells = 0; m_Margin = 0; m_nOutliers = 0; m_nMasked = 0; m_Header = ""; m_Alg = ""; m_Params = ""; m_DatHeader = ""; m_ChipType = ""; m_CellGrid.upperleft.x = 0; m_CellGrid.upperleft.y = 0; m_CellGrid.upperright.x = 0; m_CellGrid.upperright.y = 0; m_CellGrid.lowerleft.x = 0; m_CellGrid.lowerleft.y = 0; m_CellGrid.lowerright.x = 0; m_CellGrid.lowerright.y = 0; } /////////////////////////////////////////////////////////////////////////////// /// public destructor ~CCELFileHeaderData /// \brief Destructor /// /// @return void /////////////////////////////////////////////////////////////////////////////// CCELFileHeaderData::~CCELFileHeaderData() { Clear(); } /////////////////////////////////////////////////////////////////////////////// /// public Clear /// \brief Clear and initialize content of member variables /// /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileHeaderData::Clear() { m_nRows = 0; m_nCols = 0; m_nCells = 0; if (m_ParameterIndices.size() > 0) m_ParameterIndices.clear(); if (m_Parameters.size() > 0) m_Parameters.clear(); m_Header = ""; m_Alg = ""; m_Params = ""; m_DatHeader = ""; m_ChipType = ""; m_CellGrid.upperleft.x = 0; m_CellGrid.upperleft.y = 0; m_CellGrid.upperright.x = 0; m_CellGrid.upperright.y = 0; m_CellGrid.lowerleft.x = 0; m_CellGrid.lowerleft.y = 0; m_CellGrid.lowerright.x = 0; m_CellGrid.lowerright.y = 0; m_Margin = 0; m_nOutliers = 0; m_nMasked = 0; m_Magic = CELL_FILE_MAGIC_NUMBER; m_Version = CELL_FILE_VERSION_NUMBER; } /////////////////////////////////////////////////////////////////////////////// /// public GetHeader /// \brief Concatenate header data into a string and return it /// /// @return std::string Header string /////////////////////////////////////////////////////////////////////////////// std::string CCELFileHeaderData::GetHeader() { SetDatHeader(); char paramString[2000]; snprintf(paramString,sizeof(paramString), "Cols=%d\nRows=%d\nTotalX=%d\nTotalY=%d\nOffsetX=%d\nOffsetY=%d\n" "GridCornerUL=%d %d\nGridCornerUR=%d %d\nGridCornerLR=%d %d\nGridCornerLL=%d %d\n" "Axis-invertX=%d\nAxisInvertY=%d\nswapXY=%d\nDatHeader=%s\nAlgorithm=%s\n", m_nCols, m_nRows, m_nCols, m_nRows, 0,0, m_CellGrid.upperleft.x, m_CellGrid.upperleft.y, m_CellGrid.upperright.x, m_CellGrid.upperright.y, m_CellGrid.lowerright.x, m_CellGrid.lowerright.y, m_CellGrid.lowerleft.x, m_CellGrid.lowerleft.y, 0,0,0, m_DatHeader.c_str(), m_Alg.c_str()); m_Header = paramString; m_Header += "AlgorithmParameters="; m_Header += GetAlgorithmParameters(); m_Header += "\n"; return m_Header; } /////////////////////////////////////////////////////////////////////////////// /// public overloaded SetDatHeader /// \brief Set default DAT header /// /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileHeaderData::SetDatHeader() { if ((m_ChipType.length() > 0) && (m_DatHeader.length() == 0)) { char scanInfo[2000]; snprintf(scanInfo,sizeof(scanInfo), " %c %c %s.1sq %c %c %c %c %c %c %c %c %c ", DELIMCHAR, DELIMCHAR, m_ChipType.c_str(), DELIMCHAR, DELIMCHAR, DELIMCHAR, DELIMCHAR, DELIMCHAR, DELIMCHAR, DELIMCHAR, DELIMCHAR, DELIMCHAR); m_DatHeader = scanInfo; } else if ((m_ChipType.length() > 0) && (m_DatHeader.length() > 0)) { std::string temp = ""; std::string::size_type index = m_DatHeader.find(DELIMCHAR); if (index != std::string::npos) { index = m_DatHeader.find(DELIMCHAR, index + 1); if (index != std::string::npos) { std::string::size_type indexEnd = m_DatHeader.find(".1sq", index); if (indexEnd != std::string::npos) { if (m_DatHeader.substr(index + 2, indexEnd - index) != m_ChipType) { temp = m_DatHeader.substr(0, index + 2); temp += m_ChipType; temp += m_DatHeader.substr(indexEnd); } else temp = m_DatHeader; } else { indexEnd = m_DatHeader.find(DELIMCHAR, index + 1); if (indexEnd != std::string::npos) { if (m_DatHeader.substr(index + 2, indexEnd - index - 1) != m_ChipType) { temp = m_DatHeader.substr(0, index + 2); temp += m_ChipType; temp += ".1sq"; temp += m_DatHeader.substr(indexEnd); } else temp = m_DatHeader; } } } else temp = m_DatHeader; } else temp = m_DatHeader; m_DatHeader = temp; } } /////////////////////////////////////////////////////////////////////////////// /// public ParseDatHeader /// \brief Parse DAT header from header string /// /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileHeaderData::ParseDatHeader() { const char *pInfo = strstr(m_Header.c_str(), "DatHeader="); if (pInfo != NULL) { const char *pEnd = strchr(pInfo, '\n'); if (pEnd != NULL) { char *str = new char [pEnd - pInfo - 10 + 1]; strncpy(str, pInfo + 10, pEnd - pInfo - 10); str[pEnd - pInfo - 10] = '\0'; m_DatHeader = str; delete [] str; } } } /////////////////////////////////////////////////////////////////////////////// /// public ParseChipType /// \brief Parse chip type of array from header string /// /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileHeaderData::ParseChipType() { // Determine the array type from the header. // chip type is between second and third delimiters const char *pInfo = strchr(m_Header.c_str(), DELIMCHAR); if (pInfo != NULL) { pInfo = strchr(pInfo + 1, DELIMCHAR); if ((pInfo != NULL) && *(++pInfo)) { pInfo++; const char *pEnd = strchr(pInfo, '.'); if (pEnd == NULL) { pEnd = strchr(pInfo, DELIMCHAR); if (pEnd != NULL) pEnd--; } if (pEnd != NULL) { char *str = new char [pEnd - pInfo + 1]; strncpy(str, pInfo, pEnd - pInfo); str[pEnd - pInfo] = '\0'; m_ChipType = str; delete [] str; } } } } /////////////////////////////////////////////////////////////////////////////// /// public ParseAlgorithmParameters /// \brief Parse algorithm parameters from single string into STL map /// /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileHeaderData::ParseAlgorithmParameters() { if (m_Params.length() > 0) { int iStart = 0; int iEnd = 0; int iLen = (int) m_Params.length() - 1; std::string tag = ""; std::string value = ""; for (int i = 0; i < iLen; i++) { if ((m_Params.at(i) == ':') || (m_Params.at(i) == '=')) { tag = m_Params.substr(iStart, (iEnd - iStart + 1)); iStart = i + 1; iEnd = iStart; } else if ((m_Params.at(i) == ';') || (m_Params.at(i) == ' ')) { value = m_Params.substr(iStart, (iEnd - iStart + 1)); AddAlgorithmParameter(tag, value); iStart = i + 1; iEnd = iStart; } else iEnd = i; } value = m_Params.substr(iStart, (iEnd - iStart + 2)); AddAlgorithmParameter(tag, value); } } /////////////////////////////////////////////////////////////////////////////// /// public overloaded AddAlgorithmParameter /// \brief Call AddAlgorithmParameter() to add algorithm paramter into STL map. /// If tag already exists, the addition will be ignored. /// /// @param tag const char * Algorithm parameter tag /// @param value const char * Algorithm parameter value /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileHeaderData::AddAlgorithmParameter(const char *tag, const char *value) { assert(tag != NULL); assert(value != NULL); std::string strTag = tag; std::string strValue = value; AddAlgorithmParameter(strTag, strValue); } /////////////////////////////////////////////////////////////////////////////// /// public overloaded AddAlgorithmParameter /// \brief Add algorithm parameter into STL map. /// If tag already exists, the addition will be ignored. /// /// @param tag std::string & Algorithm parameter tag /// @param value std::string & Algorithm parameter value /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileHeaderData::AddAlgorithmParameter(std::string& tag, std::string& value) { assert(tag != ""); assert(value != ""); std::map::iterator pos = m_Parameters.find(tag); if (pos == m_Parameters.end()) { int index = (int) m_Parameters.size() + 1; m_ParameterIndices.insert(std::make_pair(index, tag)); m_Parameters.insert(std::make_pair(tag, value)); } } /////////////////////////////////////////////////////////////////////////////// /// public SetAlgorithmParameter /// \brief Set algorithm parameter value of existing specified tag /// /// @param tag const char * Algorithm parameter tag /// @param value const char * Algorithm parameter value /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileHeaderData::SetAlgorithmParameter(const char *tag, const char *value) { assert(tag != NULL); assert(value != NULL); std::string strTag = tag; std::string strValue = value; std::map::iterator pos = m_Parameters.find(strTag); if (pos != m_Parameters.end()) pos->second = strValue; } /////////////////////////////////////////////////////////////////////////////// /// public SetAddAlgorithmParameter /// \brief Set or Add algorithm parameter value /// /// @param tag const char * Algorithm parameter tag /// @param value const char * Algorithm parameter value /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileHeaderData::SetAddAlgorithmParameter(const char *tag, const char *value) { assert(tag != NULL); assert(value != NULL); std::string strTag = tag; std::string strValue = value; SetAddAlgorithmParameter(strTag, strValue); } /////////////////////////////////////////////////////////////////////////////// /// public SetAddAlgorithmParameter /// \brief Set or Add algorithm parameter value /// /// @param tag const char * Algorithm parameter tag /// @param value const char * Algorithm parameter value /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileHeaderData::SetAddAlgorithmParameter(std::string& tag, std::string& value) { assert(tag != ""); assert(value != ""); std::map::iterator pos = m_Parameters.find(tag); if (pos != m_Parameters.end()) { pos->second = value; } else { int index = (int) m_Parameters.size() + 1; m_ParameterIndices.insert(std::make_pair(index, tag)); m_Parameters.insert(std::make_pair(tag, value)); } } /////////////////////////////////////////////////////////////////////////////// /// public GetAlgorithmParameter /// \brief Retrieve algorithm parameter value of specified tag /// /// @param tag const char * Algorithm parameter tag /// @return std::string Algorithm parameter value or NULL if tag is not found /////////////////////////////////////////////////////////////////////////////// std::string CCELFileHeaderData::GetAlgorithmParameter(const char *tag) { assert(tag != NULL); std::string strTag = tag; std::string strValue = ""; std::map::iterator pos = m_Parameters.find(strTag); if (pos != m_Parameters.end()) strValue = pos->second; return strValue; } /////////////////////////////////////////////////////////////////////////////// /// public GetAlgorithmParameterTag /// \brief Retrieve algorithm parameter name of specified index /// /// @param index int Algorithm parameter index /// @return std::string Algorithm parameter name or NULL if not found /////////////////////////////////////////////////////////////////////////////// std::string CCELFileHeaderData::GetAlgorithmParameterTag(int index) { std::string tag=""; std::map::iterator pos = m_ParameterIndices.find(index+1); if (pos != m_ParameterIndices.end()) { tag = pos->second; } return tag; } /////////////////////////////////////////////////////////////////////////////// /// public GetAlgorithmParameters /// \brief Retreive algorithm parameters from STL map and format them into a single string /// /// @return std::string Algorithm parameters /////////////////////////////////////////////////////////////////////////////// std::string CCELFileHeaderData::GetAlgorithmParameters() { std::string strBuffer = ""; std::string strTag = ""; std::string strValue = ""; int iCount = 0; std::map::iterator posIndex; std::map::iterator pos; for (int i = 0; i < (int) m_ParameterIndices.size(); i++) { posIndex = m_ParameterIndices.find(i + 1); if (posIndex != m_ParameterIndices.end()) { strTag = posIndex->second; pos = m_Parameters.find(strTag); if (pos != m_Parameters.end()) { strValue = pos->second; if (iCount > 0) strBuffer.append(";"); strBuffer.append(strTag); strBuffer.append(":"), strBuffer.append(strValue); iCount++; } } } return strBuffer; } /////////////////////////////////////////////////////////////////////////////// /// inline public SetMargin /// \brief Set cell margin /// @param i int Cell margin /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileHeaderData::SetMargin(int i) { m_Margin = i; char sz[1024]; snprintf(sz,sizeof(sz), "%d", i); AddAlgorithmParameter("CellMargin", sz); } /////////////////////////////////////////////////////////////////////////////// /// public SetGridCorners /// \brief Set array grid coordinates /// /// @param grid GridCoordinatesType Grid coordinates /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileHeaderData::SetGridCorners(GridCoordinatesType grid) { m_CellGrid.upperleft.x = grid.upperleft.x; m_CellGrid.upperleft.y = grid.upperleft.y; m_CellGrid.upperright.x = grid.upperright.x; m_CellGrid.upperright.y = grid.upperright.y; m_CellGrid.lowerleft.x = grid.lowerleft.x; m_CellGrid.lowerleft.y = grid.lowerleft.y; m_CellGrid.lowerright.x = grid.lowerright.x; m_CellGrid.lowerright.y = grid.lowerright.y; } /////////////////////////////////////////////////////////////////////////////// /// public ParseCorners /// \brief Parse array grid coordinates from header string /// /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileHeaderData::ParseCorners() { // get grid coordinates and size from the header if (m_Header.length() > 0) { const char* pch = strstr(m_Header.c_str(), "GridCorner"); int ulx,uly; int urx,ury; int llx,lly; int lrx,lry; if ((pch != NULL) && (sscanf(pch, "GridCornerUL=%d %d GridCornerUR=%d %d GridCornerLR=%d %d GridCornerLL=%d %d", &ulx, &uly, &urx, &ury, &lrx, &lry, &llx, &lly) == 8)) { m_CellGrid.upperleft.x = ulx; m_CellGrid.upperleft.y = uly; m_CellGrid.upperright.x = urx; m_CellGrid.upperright.y = ury; m_CellGrid.lowerright.x = lrx; m_CellGrid.lowerright.y = lry; m_CellGrid.lowerleft.x = llx; m_CellGrid.lowerleft.y = lly; } } } /////////////////////////////////////////////////////////////////////////////// /// public Exists /// \brief Check if the CEL file exists by using the already set full path name /// /// @return bool true if exists; false otherwise /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::Exists() { if (ResolveName()=="") { return false; } return true; } std::string CCELFileData::ResolveName() { assert(m_FileName != ""); std::string name; struct stat st; // name name=m_FileName; if (stat(name.c_str(), &st) == 0) { return name; } // #ifdef CELFILE_USE_ZLIB name=m_FileName+".gz"; if (stat(name.c_str(), &st) == 0) { return name; } #endif // return ""; } /////////////////////////////////////////////////////////////////////////////// /// public GetFileSize /// \brief Retrieve file size /// /// @return uint32_t Size in bytes /////////////////////////////////////////////////////////////////////////////// uint32_t CCELFileData::GetFileSize() { std::string name=ResolveName(); assert(name != ""); int32_t lSize = 0; struct stat st; if (stat(name.c_str(), &st) == 0) { lSize = st.st_size; } return lSize; } /////////////////////////////////////////////////////////////////////////////// /// private Open /// \brief Determine the CEL file format and call corresponding function to read it /// /// @param bReadHeaderOnly bool [=false] Flag to determine if reading header section only /// @return bool true if success; false if fail /// /// \see DetermineFileFormat, ReadXDABCel, ReadTranscriptomeBCel, ReadCompactBCel, ReadTextCel /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::Open(bool bReadHeaderOnly) { bool retVal = false; // First close the file. Close(); DetermineFileFormat(); if (IsXDACompatibleFile()) retVal = ReadXDABCel(bReadHeaderOnly); else if (IsTranscriptomeBcelFile()) retVal = ReadTranscriptomeBCel(bReadHeaderOnly); else if (IsCompactCelFile()) retVal = ReadCompactBCel(bReadHeaderOnly); else if (IsUnsupportedCompactCelFile()) { SetError("This version of compact cel file is no longer supported."); retVal = false; } else retVal = ReadTextCel(bReadHeaderOnly); return retVal; } /////////////////////////////////////////////////////////////////////////////// /// private ReadXDABCel /// \brief Read xda format CEL file using memory mapping /// /// @param bReadHeaderOnly bool [=false] Flag to determine if reading header section only /// @return bool true if success; false if fail /// /// \remark Header section, masked cells and outlier information are read in memory. /// The reading of masked cells and outlier information is determined by the reading state /// which is set in ReadEx(). /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::ReadXDABCel(bool bReadHeaderOnly) { bool retVal = false; std::string tmp_FileName; tmp_FileName=ResolveName(); if (tmp_FileName=="") { SetError("File '"+m_FileName+"' not found"); return false; } #define OPEN_ERR_MSG "Unable to open the file." #ifdef CELFILE_USE_STDSTREAM // Open the file. std::ifstream instr; tmp_FileName=m_FileName; instr.open(tmp_FileName.c_str(), std::ios::in | std::ios::binary); // Check if open if (!instr) { SetError(OPEN_ERR_MSG " (ifstream)"); return false; } #endif // #ifdef CELFILE_USE_STDIO // FILE* instr; // tmp_FileName=m_FileName; // instr=fopen(tmp_FileName.c_str(),"rb"); // if (instr==NULL) { // SetError(OPEN_ERR_MSG " (stdio)"); // return false; // } // #endif #ifdef CELFILE_USE_ZLIB gzFile instr; // try the UNgziped file first. tmp_FileName=ResolveName(); instr=gzopen(tmp_FileName.c_str(),"rb"); if (instr==NULL) { SetError(OPEN_ERR_MSG " (gzopen)"); return false; } //printf("### CCELFileData::ReadXDABCel('%s'): open1\n",tmp_FileName.c_str()); #endif Clear(); // Read the header int iHeaderBytes = 0; int32_t magic; int32_t version; int32_t nSubGrids; // char *sval=NULL; std::string tmp_str; // Read the magic number. ReadInt32_I(instr, magic); iHeaderBytes += INT_SIZE; m_HeaderData.SetMagic(magic); // Check if new type. if (!(magic==CELL_FILE_MAGIC_NUMBER)) { SetError("The file does not appear to be the correct format."); return false; } // Read the version ReadInt32_I(instr, version); iHeaderBytes += INT_SIZE; m_HeaderData.SetVersion(version); // Read the dimensions of the array int32_t iValue = 0; uint32_t ulValue = 0; ReadInt32_I(instr, iValue); iHeaderBytes += INT_SIZE; m_HeaderData.SetRows(iValue); ReadInt32_I(instr, iValue); iHeaderBytes += INT_SIZE; m_HeaderData.SetCols(iValue); ReadInt32_I(instr, iValue); iHeaderBytes += INT_SIZE; m_HeaderData.SetCells(iValue); // Read the other members. ReadCString_I(instr, tmp_str); iHeaderBytes += INT_SIZE; iHeaderBytes += tmp_str.size(); m_HeaderData.SetHeader(tmp_str.c_str()); ReadCString_I(instr, tmp_str); iHeaderBytes += INT_SIZE; iHeaderBytes += tmp_str.size(); m_HeaderData.SetAlg(tmp_str.c_str()); ReadCString_I(instr, tmp_str); iHeaderBytes += INT_SIZE; iHeaderBytes += tmp_str.size(); m_HeaderData.SetParams(tmp_str.c_str()); ReadInt32_I(instr, iValue); iHeaderBytes += INT_SIZE; m_HeaderData.SetMargin(iValue); ReadUInt32_I(instr, ulValue); iHeaderBytes += UINT32_SIZE; m_HeaderData.SetOutliers(ulValue); ReadUInt32_I(instr, ulValue); iHeaderBytes += UINT32_SIZE; m_HeaderData.SetMasked(ulValue); ReadInt32_I(instr, nSubGrids); iHeaderBytes += INT_SIZE; // Set the chip type and DatHeader m_HeaderData.ParseChipType(); m_HeaderData.ParseDatHeader(); // Parse algorithm parameters into map m_HeaderData.ParseAlgorithmParameters(); // Set grid coordinates m_HeaderData.ParseCorners(); #ifdef CELFILE_USE_STDSTREAM instr.close(); #endif #ifdef CELFILE_USE_STDIO fclose(instr); #endif #ifdef CELFILE_USE_ZLIB gzclose(instr); #endif // Read the remaining data. if (bReadHeaderOnly) return true; #ifdef CELFILE_USE_MEMMAP #ifdef _MSC_VER // Memory map file on windows... SYSTEM_INFO info; GetSystemInfo(&info); m_hFile = CreateFile(m_FileName.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL/*FILE_FLAG_RANDOM_ACCESS*/); if (m_hFile == INVALID_HANDLE_VALUE) { SetError("Failed to open the file for win memory mapping."); return false; } m_hFileMap = CreateFileMapping(m_hFile, NULL, PAGE_READONLY, 0, 0, NULL); if (m_hFileMap != NULL) { m_lpFileMap = MapViewOfFile(m_hFileMap, FILE_MAP_READ, 0, 0, 0); if (m_lpFileMap == NULL) { Close(); SetError("Unable to map view for the win memory map file."); return false; } else { m_lpData = (char *)(m_lpFileMap) + iHeaderBytes; } } #else // Memory map file on posix... int32_t lFileSize = GetFileSize(); char* szBuffer = new char[iHeaderBytes + 1]; m_File = fopen(m_FileName.c_str(), "r"); if (m_File == NULL) { SetError("Failed to open the file for memory mapping."); return false; } fread(szBuffer, iHeaderBytes, 1, m_File); // printf("fpos=%u",ftell(m_File)); // debug delete [] szBuffer; size_t cellf_page_start = PAGE_TRUNC(ftell(m_File)); size_t cellf_page_offset = ftell(m_File) - cellf_page_start; m_MapLen = lFileSize - cellf_page_start; m_lpFileMap = mmap(NULL, m_MapLen, PROT_READ, MAP_SHARED, fileno(m_File), cellf_page_start); //printf("=== mmap: %p\n",m_lpFileMap); //jhg if (m_lpFileMap == MAP_FAILED) { Close(); static char buf[2048]; sprintf(buf, "Unable to map view for the unix memory map file: %d", errno); SetError(buf); return false; } else { m_lpData = (char *)(m_lpFileMap) + cellf_page_offset; } if (m_File != NULL) { fclose(m_File); m_File = NULL; } #endif // _MSC_VER #else // No memory mapping ... #ifdef CELFILE_USE_ZLIB // take a guess at how big the uncompressed file is. int alloc_size=(20*1024) + // approx header ((FLOAT_SIZE + FLOAT_SIZE + SHORT_SIZE)*(GetRows()*GetCols())) + // row*col ((SHORT_SIZE + SHORT_SIZE) * GetNumMasked()) + // masked ((SHORT_SIZE + SHORT_SIZE) * GetNumOutliers()); // outliers // alloc it m_lpData = new char[alloc_size]; // do the read -- must be the same file name as above. instr=gzopen(tmp_FileName.c_str(),"rb"); if (instr==NULL) { SetError("gzip open failed - take2"); return false; } gzseek(instr,iHeaderBytes,SEEK_SET); int read_size=gzread(instr,m_lpData,alloc_size); //printf("### CCELFileData::ReadXDABCel('%s')=%d (alloc=%d)\n",tmp_FileName.c_str(),read_size,alloc_size); gzclose(instr); #else #ifdef _MSC_VER // Memory map file on windows... SYSTEM_INFO info; GetSystemInfo(&info); m_hFile = CreateFile(m_FileName.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL/*FILE_FLAG_RANDOM_ACCESS*/); if (m_hFile == INVALID_HANDLE_VALUE) { SetError("Failed to open the file for win memory mapping."); return false; } m_hFileMap = CreateFileMapping(m_hFile, NULL, PAGE_READONLY, 0, 0, NULL); if (m_hFileMap != NULL) { m_lpFileMap = MapViewOfFile(m_hFileMap, FILE_MAP_READ, 0, 0, 0); if (m_lpFileMap == NULL) { Close(); SetError("Unable to map view for the win memory map file."); return false; } else { m_lpData = (char *)(m_lpFileMap) + iHeaderBytes; } } #else // printf("OPEN: non memory mapped.\n"); // m_lpFileMap=NULL; m_File=fopen(m_FileName.c_str(),"r"); if (m_File==NULL) { SetError("Failed to open file for non-memap open."); return false; } // skip the header fseek(m_File,iHeaderBytes,SEEK_SET); // printf("fpos=%u",ftell(m_File)); // debug // read the entire file into memory... int32_t datasize = GetFileSize()-iHeaderBytes; m_lpData = new char[datasize]; size_t read=fread(m_lpData,1,datasize,m_File); //printf("read=%u iHeaderBytes=%u\n",read,iHeaderBytes); if (read!=datasize) { SetError("Unable to read the entire file."); return false; } // fclose(m_File); #endif #endif // CELFILE_USE_ZLIB #endif // CELFILE_USE_MEMMAP int16_t x=0; int16_t y=0; int iCell; // "Read" the Mean data m_pEntries = (CELFileEntryType*)m_lpData; // Read the mask data int iOffset = m_HeaderData.GetCells() * (FLOAT_SIZE + FLOAT_SIZE + SHORT_SIZE); if (m_bReadMaskedCells) { for (iCell = 0; iCell < (int) m_HeaderData.GetMasked(); iCell++) { // Read the coordinate. x = ((int16_t)MmGetUInt16_I((uint16_t*)(m_lpData + iOffset + iCell * 2 * SHORT_SIZE))); //x = GetShort((short*) (m_lpData + iOffset + iCell * 2 * SHORT_SIZE), m_FileFormat); y = ((int16_t)MmGetUInt16_I((uint16_t*)(m_lpData + iOffset + iCell * 2 * SHORT_SIZE + SHORT_SIZE))); // y = GetShort((short*) (m_lpData + iOffset + iCell * 2 * SHORT_SIZE + SHORT_SIZE), m_FileFormat); m_MaskedCells.insert(std::make_pair(y * m_HeaderData.GetCols() + x, true)); } } // Read the outlier data iOffset += m_HeaderData.GetMasked() * 2 * SHORT_SIZE; if (m_bReadOutliers) { for (iCell = 0; iCell < (int) m_HeaderData.GetOutliers(); iCell++) { // Read the coordinate. x = ((int16_t)MmGetUInt16_I((uint16_t*)(m_lpData + iOffset + iCell * 2 * SHORT_SIZE))); y = ((int16_t)MmGetUInt16_I((uint16_t*)(m_lpData + iOffset + iCell * 2 * SHORT_SIZE + SHORT_SIZE))); // x = GetShort((short*) (m_lpData + iOffset + iCell * 2 * SHORT_SIZE), m_FileFormat); // y = GetShort((short*) (m_lpData + iOffset + iCell * 2 * SHORT_SIZE + SHORT_SIZE), m_FileFormat); m_Outliers.insert(std::make_pair(y * m_HeaderData.GetCols() + x, true)); } } else m_HeaderData.SetOutliers(0); if (!m_bReadMaskedCells) m_HeaderData.SetMasked(0); retVal = true; return retVal; } /////////////////////////////////////////////////////////////////////////////// /// private ReadTranscriptomeBCel /// \brief Read transcriptome binary CEL file using memory mapping /// /// @param bReadHeaderOnly bool [=false] Flag to determine if reading header section only /// @return bool true if success; false if fail /// /// \remark Header section, masked cells and outlier information are read in memory. /// The reading of masked cells and outlier information is determined by the reading state /// which is set in ReadEx(). /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::ReadTranscriptomeBCel(bool bReadHeaderOnly) { std::string tmp_FileName=ResolveName(); bool retVal = false; #ifdef CELFILE_USE_STDSTREAM // Open the file. std::ifstream instr; tmp_FileName=m_FileName; instr.open(tmp_FileName.c_str(), std::ios::in | std::ios::binary); // Check if open if (!instr) { SetError(OPEN_ERR_MSG " (ifstream)"); return false; } #endif #ifdef CELFILE_USE_ZLIB gzFile instr; // try the UNgziped file first. tmp_FileName=ResolveName(); instr=gzopen(tmp_FileName.c_str(),"rb"); if (instr==NULL) { SetError(OPEN_ERR_MSG " (gzopen)"); return false; } //printf("### CCELFileData::ReadTranscriptomeBCel('%s'): open1\n",tmp_FileName.c_str()); #endif Clear(); // Read the header int iHeaderBytes = 0; float version = 0; uint32_t ulValue = 0; uint32_t totalx = 0; uint32_t totaly = 0; uint32_t offsetx = 0; uint32_t offsety = 0; GridCoordinatesType grid; unsigned short axisinvertx = 0; unsigned short axisinverty = 0; unsigned short swapxy = 0; std::string scanInfo; std::string sval; // Read the magic number. ReadFixedString(instr, sval, BCEL_HEADER_LEN); iHeaderBytes += BCEL_HEADER_LEN; // Check if new type. if (strncmp(sval.c_str(), BCEL_HEADER_BYTES, BCEL_HEADER_LEN) != 0) { SetError("The file does not appear to be the correct format."); return false; } // Read the version ReadFloat_N(instr, version); iHeaderBytes += FLOAT_SIZE; m_HeaderData.SetVersion((int)version); // Read header information ReadUInt32_N(instr, ulValue); // header struct size iHeaderBytes += (2 * UINT32_SIZE + ulValue + BCEL_CHUNK_NAME_SIZE); ReadFixedString(instr, sval, BCEL_CHUNK_NAME_SIZE); ReadFloat_N(instr, version); ReadUInt32_N(instr, ulValue); // columns m_HeaderData.SetCols(ulValue); ReadUInt32_N(instr, ulValue); // rows m_HeaderData.SetRows(ulValue); ReadUInt32_N(instr, totalx); // totalx ReadUInt32_N(instr, totaly); // totaly ReadUInt32_N(instr, offsetx); // offsetx ReadUInt32_N(instr, offsety); // offsety ReadUInt32_N(instr, ulValue); // GridCornerUL.x grid.upperleft.x = ulValue; ReadUInt32_N(instr, ulValue); // GridCornerUL.y grid.upperleft.y = ulValue; ReadUInt32_N(instr, ulValue); // GridCornerUR.x grid.upperright.x = ulValue; ReadUInt32_N(instr, ulValue); // GridCornerUR.y grid.upperright.y = ulValue; ReadUInt32_N(instr, ulValue); // GridCornerLL.x grid.lowerleft.x = ulValue; ReadUInt32_N(instr, ulValue); // GridCornerLL.y grid.lowerleft.y = ulValue; ReadUInt32_N(instr, ulValue); // GridCornerLR.x grid.lowerright.x = ulValue; ReadUInt32_N(instr, ulValue); // GridCornerLR.y grid.lowerright.y = ulValue; m_HeaderData.SetGridCorners(grid); ReadUInt16_N(instr, axisinvertx); // Axis-invertX ReadUInt16_N(instr, axisinverty); // AxisinvertY ReadUInt16_N(instr, swapxy); // swapXY ReadUInt32_N(instr, ulValue); // Read DatHeader ReadUInt32_N(instr, ulValue); // DatHeader length iHeaderBytes += (2 * UINT32_SIZE + ulValue + BCEL_CHUNK_NAME_SIZE); ReadFixedString(instr, sval, BCEL_CHUNK_NAME_SIZE); ReadFixedString(instr, scanInfo, ulValue); if (scanInfo[ulValue - 1] == '\r') scanInfo[ulValue - 1] = '\0'; m_HeaderData.SetDatHeader(scanInfo.c_str()); ReadUInt32_N(instr, ulValue); // Read Algorithm ReadUInt32_N(instr, ulValue); // Algorithm length iHeaderBytes += (2 * UINT32_SIZE + ulValue + BCEL_CHUNK_NAME_SIZE); ReadFixedString(instr, sval, BCEL_CHUNK_NAME_SIZE); ReadFixedString(instr, sval, ulValue); if (sval[ulValue - 1] == '\r') sval[ulValue - 1] = '\0'; m_HeaderData.SetAlg(sval.c_str()); ReadUInt32_N(instr, ulValue); // Read AlgorithmParameters ReadUInt32_N(instr, ulValue); // AlgorithmParameters length iHeaderBytes += (2 * UINT32_SIZE + ulValue + BCEL_CHUNK_NAME_SIZE); ReadFixedString(instr, sval, BCEL_CHUNK_NAME_SIZE); ReadFixedString(instr, sval, ulValue); if (sval[ulValue - 1] == '\r') sval[ulValue - 1] = '\0'; m_HeaderData.SetParams(sval.c_str()); ReadUInt32_N(instr, ulValue); char paramString[2000]; snprintf(paramString,sizeof(paramString), "Cols=%d\nRows=%d\n" "TotalX=%u\nTotalY=%u\nOffsetX=%u\nOffsetY=%u\n" "GridCornerUL=%d %d\nGridCornerUR=%d %d\n" "GridCornerLR=%d %d\nGridCornerLL=%d %d\n" "Axis-invertX=%d\nAxisInvertY=%d\nswapXY=%d\nDatHeader=%s\n" "Algorithm=%s\nAlgorithmParameters=", m_HeaderData.GetCols(),m_HeaderData.GetRows(), totalx,totaly,offsetx,offsety, grid.upperleft.x,grid.upperleft.y, grid.upperright.x,grid.upperright.y, grid.lowerright.x,grid.lowerright.y, grid.lowerleft.x,grid.lowerleft.y, axisinvertx,axisinverty,swapxy, scanInfo.c_str(), m_HeaderData.GetAlg().c_str()); std::string param = paramString; param += m_HeaderData.GetParams(); param += "\n"; m_HeaderData.SetHeader(param.c_str()); // Read cell size ReadUInt32_N(instr, ulValue); int iOffset = ulValue; m_HeaderData.SetCells(ulValue / STRUCT_SIZE_FEATURE_DATA); ReadFixedString(instr, sval, BCEL_CHUNK_NAME_SIZE); iHeaderBytes += (UINT32_SIZE + BCEL_CHUNK_NAME_SIZE); // Set the chip type m_HeaderData.ParseChipType(); // Parse algorithm parameters into map m_HeaderData.ParseAlgorithmParameters(); m_HeaderData.SetMargin(atoi(m_HeaderData.GetAlgorithmParameter("CellMargin").c_str())); // Set grid coordinates m_HeaderData.ParseCorners(); // Read the remaining data. if (bReadHeaderOnly) return true; #ifdef CELFILE_USE_STDSTREAM instr.close(); #endif #ifdef CELFILE_USE_ZLIB gzclose(instr); #endif #ifdef CELFILE_USE_MEMMAP // Memory map file #ifdef _MSC_VER SYSTEM_INFO info; GetSystemInfo(&info); m_hFile = CreateFile(m_FileName.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL/*FILE_FLAG_RANDOM_ACCESS*/); if (m_hFile == INVALID_HANDLE_VALUE) { SetError("Failed to open the file for memory mapping."); return false; } m_hFileMap = CreateFileMapping(m_hFile, NULL, PAGE_READONLY, 0, 0, NULL); if (m_hFileMap != NULL) { m_lpFileMap = MapViewOfFile(m_hFileMap, FILE_MAP_READ, 0, 0, 0); if (m_lpFileMap == NULL) { Close(); SetError("Unable to map view for the win memory map file."); return false; } else { m_lpData = (char *)(m_lpFileMap) + iHeaderBytes; } } #else // posix int32_t lFileSize = GetFileSize(); char* szBuffer = new char[iHeaderBytes + 1]; m_File = fopen(m_FileName.c_str(), "r"); if (m_File == NULL) { SetError("Failed to open the file for memory mapping."); return false; } fread(szBuffer, iHeaderBytes, 1, m_File); delete [] szBuffer; size_t cellf_page_start = PAGE_TRUNC(ftell(m_File)); size_t cellf_page_offset = ftell(m_File) - cellf_page_start; #ifdef __CYGWIN__ cellf_page_offset -= 1; #endif m_MapLen = lFileSize - cellf_page_start; m_lpFileMap = mmap(NULL, m_MapLen, PROT_READ, MAP_SHARED, fileno(m_File), cellf_page_start); if (m_lpFileMap == MAP_FAILED) { Close(); static char buf[2048]; sprintf(buf, "Unable to map view for the unix memory map file: %d", errno); SetError(buf); return false; } else { m_lpData = (char *)(m_lpFileMap) + cellf_page_offset; } if (m_File != NULL) { fclose(m_File); m_File = NULL; } #endif // if _msc_ver #endif // celfile_use_mmap #ifdef CELFILE_USE_STDSTREAM int alloc_size=GetFileSize(); m_lpData=new char[alloc_size]; instr.open(tmp_FileName.c_str(),std::ios::in | std::ios::binary); instr.seekg(iHeaderBytes,std::ios::beg); instr.read(m_lpData,alloc_size-iHeaderBytes); instr.close(); #endif #ifdef CELFILE_USE_ZLIB int alloc_size= ((SHORT_SIZE+SHORT_SIZE+CHAR_SIZE)*(GetRows()*GetCols())) + // row*col (100*1024) // add 100k for masked and outliers ; m_lpData=new char[alloc_size]; instr=gzopen(tmp_FileName.c_str(),"rb"); gzseek(instr,iHeaderBytes,SEEK_SET); int read_size=gzread(instr,m_lpData,alloc_size); //printf("### CCELFileData::ReadCompactCelFile('%s')=%d (alloc=%d)\n",tmp_FileName.c_str(),read_size,alloc_size); gzclose(instr); #endif // Read the Mean data uint32_t x = 0; uint32_t y = 0; int iCell; m_pTransciptomeEntries = (CELFileTranscriptomeEntryType*) m_lpData; // Read mask size iOffset = m_HeaderData.GetCells() * STRUCT_SIZE_FEATURE_DATA + UINT32_SIZE; ulValue = MmGetUInt32_N((uint32_t*)(m_lpData + iOffset)); m_HeaderData.SetMasked(ulValue / STRUCT_SIZE_XY_PAIR); iOffset += (UINT32_SIZE + BCEL_CHUNK_NAME_SIZE); // Read the mask data if (m_bReadMaskedCells) { for (iCell = 0; iCell < (int)m_HeaderData.GetMasked(); iCell++) { // Read the coordinate. x = MmGetUInt32_N((uint32_t*) (m_lpData + iOffset + iCell * 2 * UINT32_SIZE)); y = MmGetUInt32_N((uint32_t*) (m_lpData + iOffset + iCell * 2 * UINT32_SIZE + UINT32_SIZE)); m_MaskedCells.insert(std::make_pair(y * m_HeaderData.GetCols() + x, true)); } } iOffset += (m_HeaderData.GetMasked() * STRUCT_SIZE_XY_PAIR + UINT32_SIZE); // Read outlier size ulValue = MmGetUInt32_N((uint32_t*)(m_lpData + iOffset)); m_HeaderData.SetOutliers(ulValue / STRUCT_SIZE_XY_PAIR); iOffset += (UINT32_SIZE + BCEL_CHUNK_NAME_SIZE); // Read the outlier data if (m_bReadOutliers) { for (iCell = 0; iCell < (int)m_HeaderData.GetOutliers(); iCell++) { // Read the coordinate. x = MmGetUInt32_N((uint32_t*) (m_lpData + iOffset + iCell * 2 * UINT32_SIZE)); y = MmGetUInt32_N((uint32_t*) (m_lpData + iOffset + iCell * 2 * UINT32_SIZE + UINT32_SIZE)); m_Outliers.insert(std::make_pair(y * m_HeaderData.GetCols() + x, true)); } } else m_HeaderData.SetOutliers(0); if (!m_bReadMaskedCells) m_HeaderData.SetMasked(0); retVal = true; return retVal; } /////////////////////////////////////////////////////////////////////////////// /// private ReadCompactBCel /// \brief Read compact binary CEL file using memory mapping /// /// @param bReadHeaderOnly bool [=false] Flag to determine if reading header section only /// @return bool true if success; false if fail /// /// \remark Header section is read in memory. /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::ReadCompactBCel(bool bReadHeaderOnly) { bool retVal = false; std::string tmp_FileName; tmp_FileName=ResolveName(); if (tmp_FileName=="") { SetError("File '"+m_FileName+"' not found"); return false; } // Open the file. #ifdef CELFILE_USE_STDSTREAM std::ifstream instr; instr.open(tmp_FileName.c_str(), std::ios::in | std::ios::binary); // Check if open if (!instr) { SetError("Unable to open the file."); return false; } #endif #ifdef CELFILE_USE_ZLIB gzFile instr; instr=gzopen(tmp_FileName.c_str(),"rb"); if (instr==NULL) { SetError("Unable to open the file." " (gzopen)"); return false; } //printf("### CCELFileData::ReadCompactBCel('%s')\n",tmp_FileName.c_str()); #endif Clear(); // Read the header int iHeaderBytes = 0; int32_t version; int32_t nSubGrids; std::string tmpstr; // Read the magic number. std::string magic; ReadFixedString(instr, magic, CCEL_HEADER_LEN); iHeaderBytes += CCEL_HEADER_LEN; // Check if new type. if (strncmp(magic.c_str(), CCEL_HEADER_BYTES, CCEL_HEADER_LEN) != 0) { SetError("The file does not appear to be the correct format."); return false; } // Read the version ReadInt32_I(instr, version); iHeaderBytes += INT_SIZE; m_HeaderData.SetVersion(version); // Read the dimensions of the array int32_t iValue = 0; uint32_t ulValue = 0; ReadInt32_I(instr, iValue); iHeaderBytes += INT_SIZE; m_HeaderData.SetRows(iValue); ReadInt32_I(instr, iValue); iHeaderBytes += INT_SIZE; m_HeaderData.SetCols(iValue); ReadInt32_I(instr, iValue); iHeaderBytes += INT_SIZE; m_HeaderData.SetCells(iValue); // Read the other members. ReadCString_I(instr, tmpstr); iHeaderBytes += INT_SIZE; iHeaderBytes += tmpstr.size(); m_HeaderData.SetHeader(tmpstr.c_str()); ReadCString_I(instr,tmpstr); iHeaderBytes += INT_SIZE; iHeaderBytes += tmpstr.size(); m_HeaderData.SetAlg(tmpstr.c_str()); ReadCString_I(instr, tmpstr); iHeaderBytes += INT_SIZE; iHeaderBytes += tmpstr.size(); m_HeaderData.SetParams(tmpstr.c_str()); ReadInt32_I(instr, iValue); iHeaderBytes += INT_SIZE; m_HeaderData.SetMargin(iValue); // Added -- causes backwards compatability issues w/ earlier pre-releases ReadUInt32_I(instr, ulValue); iHeaderBytes += UINT32_SIZE; m_HeaderData.SetMasked(ulValue); ReadInt32_I(instr, nSubGrids); iHeaderBytes += INT_SIZE; // This hack needs to be changed. In short, the application of ccel to date // is on chips with 0 sub grids reported. If we are dealing with a ccel file // from a pre inclusion of mask values we will get non-zero subgrids. Hence // the assert: assert(nSubGrids == 0); // Set the chip type and DatHeader m_HeaderData.ParseChipType(); m_HeaderData.ParseDatHeader(); // Parse algorithm parameters into map m_HeaderData.ParseAlgorithmParameters(); // Set grid coordinates m_HeaderData.ParseCorners(); #ifdef CELFILE_USE_STDSTREAM instr.close(); #endif #ifdef CELFILE_USE_ZLIB gzclose(instr); #endif // Read the remaining data. if (bReadHeaderOnly) return true; // Memory map file #ifdef CELFILE_USE_MEMMAP #ifdef _MSC_VER SYSTEM_INFO info; GetSystemInfo(&info); m_hFile = CreateFile(m_FileName.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL/*FILE_FLAG_RANDOM_ACCESS*/); if (m_hFile == INVALID_HANDLE_VALUE) { SetError("Failed to open the file for memory mapping."); return false; } m_hFileMap = CreateFileMapping(m_hFile, NULL, PAGE_READONLY, 0, 0, NULL); if (m_hFileMap != NULL) { m_lpFileMap = MapViewOfFile(m_hFileMap, FILE_MAP_READ, 0, 0, 0); if (m_lpFileMap == NULL) { Close(); SetError("Unable to map view for the win memory map file."); return false; } else { m_lpData = (char *)(m_lpFileMap) + iHeaderBytes; } } #else int32_t lFileSize = GetFileSize(); char* szBuffer = new char[iHeaderBytes + 1]; m_File = fopen(m_FileName.c_str(), "r"); if (m_File == NULL) { SetError("Failed to open the file for memory mapping."); return false; } fread(szBuffer, iHeaderBytes, 1, m_File); delete [] szBuffer; size_t cellf_page_start = PAGE_TRUNC(ftell(m_File)); size_t cellf_page_offset = ftell(m_File) - cellf_page_start; #ifdef __CYGWIN__ cellf_page_offset -= 1; #endif m_MapLen = lFileSize - cellf_page_start; m_lpFileMap = mmap(NULL, m_MapLen, PROT_READ, MAP_SHARED, fileno(m_File), cellf_page_start); if (m_lpFileMap == MAP_FAILED) { Close(); static char buf[2048]; sprintf(buf, "Unable to map view for the unix memory map file: %d", errno); SetError(buf); return false; } else { m_lpData = (char *)(m_lpFileMap) + cellf_page_offset; } if (m_File != NULL) { fclose(m_File); m_File = NULL; } #endif #else // no-mem-map int alloc_size=(20*1024)+ ((SHORT_SIZE)*(GetRows()*GetCols())) + // row*col ((SHORT_SIZE + SHORT_SIZE) * GetNumMasked()) + // masked ((SHORT_SIZE + SHORT_SIZE) * GetNumOutliers()); // outliers m_lpData=new char[alloc_size]; #ifdef CELFILE_USE_STDSTREAM instr.open(tmp_FileName.c_str(),std::ios::in | std::ios::binary); instr.seekg(iHeaderBytes,std::ios::beg); instr.read(m_lpData,alloc_size-iHeaderBytes); #endif #ifdef CELFILE_USE_ZLIB instr=gzopen(tmp_FileName.c_str(),"rb"); gzseek(instr,iHeaderBytes,SEEK_SET); int read_size=gzread(instr,m_lpData,alloc_size); //printf("### CCELFileData::ReadCompactCelFile('%s')=%d (alloc=%d)\n",tmp_FileName.c_str(),read_size,alloc_size); gzclose(instr); #endif #endif // Read the Mean data m_pMeanIntensities = (uint16_t*)m_lpData; // Read the mask data int16_t x=0; int16_t y=0; int iOffset = m_HeaderData.GetCells() * USHORT_SIZE; if (m_bReadMaskedCells) { for (int iCell = 0; iCell < (int) m_HeaderData.GetMasked(); iCell++) { // Read the coordinate. x = ((int16_t)MmGetUInt16_I((uint16_t*)(m_lpData + iOffset + iCell * 2 * USHORT_SIZE))); //x = GetShort((short*) (m_lpData + iOffset + iCell * 2 * SHORT_SIZE), m_FileFormat); y = ((int16_t)MmGetUInt16_I((uint16_t*)(m_lpData + iOffset + iCell * 2 * USHORT_SIZE + USHORT_SIZE))); // y = GetShort((short*) (m_lpData + iOffset + iCell * 2 * SHORT_SIZE + SHORT_SIZE), m_FileFormat); m_MaskedCells.insert(std::make_pair(y * m_HeaderData.GetCols() + x, true)); } } else m_HeaderData.SetMasked(0); retVal = true; return retVal; } /////////////////////////////////////////////////////////////////////////////// /// public EnsureNotMmapped /// \brief Store data in memory if CEL file is memory mapped /// /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::EnsureNotMmapped() { // skip if not mmapped... if (m_lpFileMap==NULL) { return; } // it is mmapped, so fix it. if (m_FileFormat == TRANSCRIPTOME_BCEL) // the (short,short,char) format { assert(m_pTransciptomeEntries != NULL); // duplicate the data size_t bytecnt = GetCols() * GetRows() * sizeof(CELFileTranscriptomeEntryType); CELFileTranscriptomeEntryType *tmpptr = (CELFileTranscriptomeEntryType *) malloc(bytecnt); memcpy(tmpptr, m_pTransciptomeEntries, bytecnt); // discard the old map Munmap(); // put it in place m_pTransciptomeEntries = tmpptr; } else if (m_FileFormat == XDA_BCEL) // the (float,float,short) format { assert(m_pEntries != NULL); // duplicate the data size_t bytecnt = GetCols() * GetRows() * sizeof(CELFileEntryType); CELFileEntryType *tmpptr = (CELFileEntryType *) malloc(bytecnt); memcpy(tmpptr, m_pEntries, bytecnt); // discard the old map Munmap(); // put it in place m_pEntries = tmpptr; } else if (m_FileFormat == COMPACT_BCEL) // the (short) format { assert(m_pMeanIntensities != NULL); // duplicate the data size_t bytecnt = GetCols() * GetRows() * sizeof(unsigned short); unsigned short *tmpptr = (unsigned short *) malloc(bytecnt); memcpy(tmpptr, m_pMeanIntensities, bytecnt); // discard the old map Munmap(); // put it in place m_pMeanIntensities = tmpptr; } } /////////////////////////////////////////////////////////////////////////////// /// private ReadTextCel /// \brief Read text CEL file in memory /// /// @param bReadHeaderOnly bool [=false] Flag to determine if reading header section only /// @return bool true if success; false if fail /// /// \remark The reading of masked cells and outlier information is determined by the reading state /// which is set in ReadEx(). /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::ReadTextCel(bool bReadHeaderOnly) { bool retVal = false; std::string tmp_FileName; tmp_FileName=ResolveName(); if (tmp_FileName=="") { SetError("Cant find file: '"+m_FileName+"'"); return false; } //#ifdef CELFILE_USE_STDSTREAM // Open the file. std::ifstream instr; instr.open(tmp_FileName.c_str(), std::ios::in); // Check if open if (!instr) { SetError("Unable to open the file." " (stdstream)"); return false; } //#endif //#ifdef CELFILE_USE_ZLIB // gzFile instr; // instr=gzopen(tmp_FileName.c_str(),"rb"); // if (instr==NULL) { // SetError("Unable to open the file." " (gzopen)"); // } //#endif Clear(); const int MAXLINELENGTH = 4096; char pszHeader[MAXLINELENGTH]; char tempStr[MAXLINELENGTH]; // Extract a line of header //#ifdef CELFILE_USE_STDSTREAM instr.getline(pszHeader, sizeof(pszHeader)); //#endif //#ifdef CELFILE_USE_ZLIB // gzgets(instr,pszHeader,sizeof(pszHeader)); //#endif //Determine the version number if (strncmp(pszHeader,"[CEL]",5)==0) m_HeaderData.SetVersion(3); else if (strncmp(pszHeader,"COLS/ROWS=",10)==0) m_HeaderData.SetVersion(2); else { SetError("Unrecognized CEL file format."); return false; } //read and store the header if (m_HeaderData.GetVersion() == 2) { m_HeaderData.SetHeader(pszHeader); instr.getline(pszHeader, MAXLINELENGTH); m_HeaderData.AppendHeader(pszHeader); instr.getline(pszHeader, MAXLINELENGTH); // Store rows and columns int iCols = 0; int iRows = 0; sscanf(m_HeaderData.GetHeader().c_str(), "COLS/ROWS=%d %d", &iCols, &iRows); m_HeaderData.SetCols(iCols); m_HeaderData.SetRows(iRows); m_HeaderData.SetCells(iCols*iRows); } else { bool moreSpace=true; //Read past [HEADER] to first line of header data while (moreSpace) { if (!instr.getline(pszHeader, MAXLINELENGTH)) return false; if (strncmp(pszHeader,"[HEADER]",8)==0) moreSpace=false; } int iCols = 0; int iRows = 0; instr.getline(pszHeader, MAXLINELENGTH); sscanf(pszHeader,"Cols=%d",&iCols); m_HeaderData.SetCols(iCols); m_HeaderData.SetHeader(pszHeader); instr.getline(pszHeader, MAXLINELENGTH); sscanf(pszHeader,"Rows=%d",&iRows); m_HeaderData.SetRows(iRows); m_HeaderData.AppendHeader(pszHeader); m_HeaderData.SetCells(iRows*iCols); //Now read the rest of the header bool moreHeader=true; while (moreHeader) { if (!instr.getline(pszHeader, MAXLINELENGTH)) return false; if (strncmp(pszHeader,"DatHeader=",10)==0) //Last line of the header { if (pszHeader[strlen(pszHeader) - 1] == '\r') pszHeader[strlen(pszHeader) - 1] = '\0'; m_HeaderData.SetDatHeader(pszHeader + 10); } if (strncmp(pszHeader,"Algorithm=",10)==0) //Last line of the header { sscanf(pszHeader,"Algorithm=%s",tempStr); m_HeaderData.SetAlg(tempStr); } if (strncmp(pszHeader,"AlgorithmParameters=",20)==0) //Last line of the header { sscanf(pszHeader,"AlgorithmParameters=%s",tempStr); m_HeaderData.SetParams(tempStr); moreHeader=false; } m_HeaderData.AppendHeader(pszHeader); } } // Set the chip type m_HeaderData.ParseChipType(); // Parse algorithm parameters into map m_HeaderData.ParseAlgorithmParameters(); m_HeaderData.SetMargin(atoi(m_HeaderData.GetAlgorithmParameter("CellMargin").c_str())); // Set grid coordinates m_HeaderData.ParseCorners(); char paramString[2000]; GridCoordinatesType grid = m_HeaderData.GetGridCorners(); snprintf(paramString,sizeof(paramString), "Cols=%d\nRows=%d\nTotalX=%d\nTotalY=%d\nOffsetX=%d\nOffsetY=%d\n" "GridCornerUL=%d %d\nGridCornerUR=%d %d\nGridCornerLR=%d %d\nGridCornerLL=%d %d\n" "Axis-invertX=%d\nAxisInvertY=%d\nswapXY=%d\nDatHeader=%s" "\nAlgorithm=%s\nAlgorithmParameters=", m_HeaderData.GetCols(),m_HeaderData.GetRows(), m_HeaderData.GetCols(),m_HeaderData.GetRows(), 0,0, grid.upperleft.x, grid.upperleft.y, grid.upperright.x, grid.upperright.y, grid.lowerright.x, grid.lowerright.y, grid.lowerleft.x, grid.lowerleft.y, 0,0,0, m_HeaderData.GetDatHeader().c_str(), m_HeaderData.GetAlg().c_str()); std::string param = paramString; param += m_HeaderData.GetAlgorithmParameters(); param += "\n"; m_HeaderData.SetHeader(param.c_str()); // Don't continue if just reading the header. if (bReadHeaderOnly) return true; // Create memory for Mean data. m_HeaderData.SetCells(m_HeaderData.GetRows() * m_HeaderData.GetCols()); m_pEntries = new CELFileEntryType[m_HeaderData.GetCells()]; int t_x,t_y,t_pixels; float t_mean,t_stdv; // Read v2 CEL files if (m_HeaderData.GetVersion() == 2) { // Write the Mean data const char *strCellEntryFormat="%d %d %f %f %d"; for (int iCell=0; iCell < m_HeaderData.GetCells(); iCell++) { instr.getline(pszHeader, MAXLINELENGTH); sscanf(pszHeader, strCellEntryFormat, &t_x, &t_y, &t_mean, &t_stdv, &t_pixels); SetIntensity(t_x,t_y,t_mean); SetStdv(t_x,t_y,t_stdv); SetPixels(t_x,t_y,t_pixels); } retVal = true; } else { //Advance to the beginning of the Mean data bool readMore=true; while(readMore) { if (!instr.getline(pszHeader, MAXLINELENGTH)) return false; if (strncmp(pszHeader,"[INTENSITY]",11)==0) readMore=false; } instr.getline(pszHeader, MAXLINELENGTH);//Data starts at 2 lines past [Mean] instr.getline(pszHeader, MAXLINELENGTH);//Data starts at 2 lines past [Mean] //Read the Mean data int iCell=0; readMore=true; const char *strCellEntryFormat="%d\t%d\t%f\t%f\t%d"; while (readMore) { if (!instr.getline(pszHeader, MAXLINELENGTH)) //end of file readMore=false; else if (strlen(pszHeader) < MIN_CELLSTR )// blank line at end of data readMore=false; else { sscanf(pszHeader, strCellEntryFormat, &t_x, &t_y, &t_mean, &t_stdv, &t_pixels); SetIntensity(t_x,t_y,t_mean); SetStdv(t_x,t_y,t_stdv); SetPixels(t_x,t_y,t_pixels); ++iCell; } } //Advance to the Masked data readMore=true; while(readMore) { if (!instr.getline(pszHeader, MAXLINELENGTH)) //end of file return false; if (strncmp(pszHeader,"[MASKS]",7)==0) readMore=false; } //Read number of masked cells instr.getline(pszHeader, MAXLINELENGTH); int nMasked=0; sscanf(pszHeader, "NumberCells=%d", &nMasked); m_HeaderData.SetMasked(nMasked); instr.getline(pszHeader, MAXLINELENGTH);//skip over the header //Read the masked data if (m_bReadMaskedCells) { readMore=true; while (readMore) { if (!instr.getline(pszHeader, MAXLINELENGTH)) //end of file readMore=false; else if (strlen(pszHeader) < MIN_CELLSTR )// blank line at end of data readMore=false; else { int x, y, iCellEntry; sscanf(pszHeader, "%d\t%d", &x, &y); iCellEntry = y * m_HeaderData.GetCols() + x; m_MaskedCells.insert(std::make_pair(iCellEntry, true)); } } } else m_HeaderData.SetMasked(0); //Advance to the outlier data readMore=true; while(readMore) { if (!instr.getline(pszHeader, MAXLINELENGTH)) //end of file return false; if (strncmp(pszHeader,"[OUTLIERS]",10)==0) readMore=false; } //Read number of outlier cells instr.getline(pszHeader, MAXLINELENGTH); int nOutliers=0; sscanf(pszHeader, "NumberCells=%d", &nOutliers); m_HeaderData.SetOutliers(nOutliers); instr.getline(pszHeader, MAXLINELENGTH);//skip over the header //Read the outlier data if (m_bReadOutliers) { readMore=true; while (readMore) { if (!instr.getline(pszHeader, MAXLINELENGTH)) //end of file readMore=false; else if (strlen(pszHeader) < MIN_CELLSTR )// blank line at end of data readMore=false; else { int x, y, iCellEntry; sscanf(pszHeader, "%d\t%d", &x, &y); iCellEntry = y * m_HeaderData.GetCols() + x; m_Outliers.insert(std::make_pair(iCellEntry, true)); } } } else m_HeaderData.SetOutliers(0); retVal = true; } instr.close(); return retVal; } /////////////////////////////////////////////////////////////////////////////// /// public Clear /// \brief Reset content and deallocate memory /// /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::Clear() { // throw away our memmap on the way out. Munmap(); m_HeaderData.Clear(); m_MaskedCells.clear(); m_Outliers.clear(); delete [] m_pEntries; m_pEntries=NULL; delete [] m_pTransciptomeEntries; m_pTransciptomeEntries = NULL; delete [] m_pMeanIntensities; m_pMeanIntensities = NULL; } /////////////////////////////////////////////////////////////////////////////// /// public Munmap /// \brief Unmap file view /// /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::Munmap() { // If there isnt a mapping we dont have to munmap... if (m_lpFileMap == NULL) { // ...but we should get rid of memory which might have been alloced. delete [] m_pEntries; m_pEntries=NULL; return; } // zero out the pointers which are now invalid... m_lpData = NULL; m_pTransciptomeEntries = NULL; m_pEntries = NULL; m_pMeanIntensities = NULL; // free the map #ifdef _MSC_VER if (m_lpFileMap != NULL) { UnmapViewOfFile(m_lpFileMap); CloseHandle(m_hFileMap); m_hFileMap = NULL; CloseHandle(m_hFile); m_hFile = INVALID_HANDLE_VALUE; } #else if ((m_lpFileMap != MAP_FAILED) && (m_lpFileMap != NULL)) { //printf("=== unmap %p\n",m_lpFileMap); // jhg munmap(m_lpFileMap, m_MapLen); // m_lpFileMap = MAP_FAILED; // why? m_MapLen = 0; // should be closed already. if (m_File != NULL) { fclose(m_File); m_File = NULL; } } #endif m_lpFileMap=NULL; } /////////////////////////////////////////////////////////////////////////////// /// public Close /// \brief Unmap file view and reset data pointer /// /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::Close() { Clear(); } /////////////////////////////////////////////////////////////////////////////// /// public ReadHeader /// \brief Determine CEL file format and call appropriate function to read header section only /// /// @return bool true if success, false if fail /// /// \remark File name has to be preset using SetFileName() /// \see Read, Open /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::ReadHeader() { // Read the header, close if failed. if (Open(true) == false) { Close(); return false; } return true; } /////////////////////////////////////////////////////////////////////////////// /// public Read /// \brief Determine CEL file format and call appropriate function to read file /// /// @param bIncludeMaskAndOutliers bool Flag for including masked cells and outliers /// @return bool true if success, false if fail /// /// \remark File name has to be preset using SetFileName() /// \see ReadEx, Open /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::Read(bool bIncludeMaskAndOutliers) { m_bReadMaskedCells = bIncludeMaskAndOutliers; m_bReadOutliers = bIncludeMaskAndOutliers; // Open the file if (Open() == false) { Close(); return false; } return true; } /////////////////////////////////////////////////////////////////////////////// /// public ReadEx /// /// \brief Determine CEL file format and call appropriate function to read file using the specified file name /// /// @param filename const char * [=0] File name of CEL file to be read /// @param nState int [=CEL_ALL] Reading state /// @return bool true if success, false if fail /// /// \a nState can be one or combination of the following values:\n\n /// CEL_ALL Read all information in file (default)\n /// CEL_DATA Read header and intensities only\n /// CEL_OUTLIER Read header, intensities and outliers\n /// CEL_MASK Read header, intensities and masked cells\n\n /// File name previously set by using SetFileName() will be replaced by the current input. /// /// \see Read, Open, GetReadingState /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::ReadEx(const char *filename, int nState) { m_nReadState = nState; // Open the file SetFileName(filename); // Determine if outliers should be read if ((nState & (CEL_ALL | CEL_OUTLIER))) this->m_bReadOutliers = true; else this->m_bReadOutliers = false; // Determine if masks should be read if ((nState & (CEL_ALL | CEL_MASK))) this->m_bReadMaskedCells = true; else this->m_bReadMaskedCells = false; if (Open() == false) { Close(); return false; } return true; } /// Length of buffer for GetHeaderKey. #define SVALUE_LENGTH 50 /////////////////////////////////////////////////////////////////////////////// /// public GetHeaderKey /// \brief Retrieve header value in string by specifying header key /// /// @param key const char * Header section key /// @return std::string Header section value /////////////////////////////////////////////////////////////////////////////// std::string CCELFileData::GetHeaderKey(const char* key) { assert(key != NULL); // tmp buff for snprintf char buf[SVALUE_LENGTH+1]; std::string strKey = key; std::transform(strKey.begin(), strKey.end(), strKey.begin(), toupper); if (strKey == "HEADER") { return GetHeaderString(); } else if (strKey == "VERSION") { snprintf(buf,SVALUE_LENGTH, "%d", GetVersion()); return std::string(buf); } else if (strKey == "COLS") { snprintf(buf,SVALUE_LENGTH, "%d", GetCols()); return std::string(buf); } else if (strKey == "ROWS") { snprintf(buf,SVALUE_LENGTH, "%d", GetRows()); return std::string(buf); } else if (strKey == "TOTALX") { snprintf(buf,SVALUE_LENGTH, "%d", GetCols()); return std::string(buf); } else if (strKey == "TOTALY") { snprintf(buf,SVALUE_LENGTH, "%d", GetRows()); return std::string(buf); } else if (strKey == "GRIDCORNERUL") { snprintf(buf,SVALUE_LENGTH, "(%d, %d)", GetGridCorners().upperleft.x, GetGridCorners().upperleft.y); return std::string(buf); } else if (strKey == "GRIDCORNERUR") { snprintf(buf,SVALUE_LENGTH, "(%d, %d)", GetGridCorners().upperright.x, GetGridCorners().upperright.y); return std::string(buf); } else if (strKey == "GRIDCORNERLL") { snprintf(buf,SVALUE_LENGTH, "(%d, %d)", GetGridCorners().lowerleft.x, GetGridCorners().lowerleft.y); return std::string(buf); } else if (strKey == "GRIDCORNERLR") { snprintf(buf,SVALUE_LENGTH, "(%d, %d)", GetGridCorners().lowerright.x, GetGridCorners().lowerright.y); return std::string(buf); } else if (strKey == "OFFSETX") return "0"; else if (strKey == "OFFSETY") return "0"; else if (strKey == "AXIS-INVERTX") return "0"; else if (strKey == "AXISINVERTY") return "0"; else if (strKey == "SWAPXY") return "0"; else if (strKey == "DATHEADER") return GetDatHeader(); else if (strKey == "ALGORITHM") return GetAlg(); else if (strKey == "ALGORITHMPARAMETERS") return GetParams(); else if (strKey == "NUMBERCELLS") { snprintf(buf,SVALUE_LENGTH, "%d", GetNumCells()); return std::string(buf); } else if (strKey == "NUMBERMASKEDCELLS") { snprintf(buf,SVALUE_LENGTH, "%d", GetNumMasked()); return std::string(buf); } else if (strKey == "NUMBEROUTLIERCELLS") { snprintf(buf,SVALUE_LENGTH, "%d", GetNumOutliers()); return std::string(buf); } // unknown header... return ""; } #undef SVALUE_LENGTH // dont need this any more. /////////////////////////////////////////////////////////////////////////////// /// public GetEntry /// \brief Retrieve entry of specified cell for xda and text formats /// /// @param x int X coordinates /// @param y int Y coordinates /// @param entry CELFileEntryType& Cell entry /////////////////////////////////////////////////////////////////////////////// void CCELFileData::GetEntry(int x, int y, CELFileEntryType &entry) { assert((x >= 0) && (x <= m_HeaderData.GetCols())); assert((y >= 0) && (y <= m_HeaderData.GetRows())); GetEntry(XYToIndex(x,y), entry); } /////////////////////////////////////////////////////////////////////////////// /// public GetEntry /// \brief Retrieve entry of specified cell for xda and text formats /// /// @param index int Cell index /// @param entry CELFileEntryType& Cell entry /////////////////////////////////////////////////////////////////////////////// void CCELFileData::GetEntry(int index, CELFileEntryType &entry) { assert((index >= 0) && (index < m_HeaderData.GetCells())); entry.Intensity = GetIntensity(index); entry.Stdv = GetStdv(index); entry.Pixels = GetPixels(index); } /////////////////////////////////////////////////////////////////////////////// /// public GetTranscriptomeEntry /// \brief Retrieve entry of specified cell for transcriptome bcel format /// /// @param x int X coordinates /// @param y int Y coordinates /// @param entry CELFileTranscriptomeEntryType& Cell entry /////////////////////////////////////////////////////////////////////////////// void CCELFileData::GetTranscriptomeEntry(int x, int y, CELFileTranscriptomeEntryType &entry) { assert((x >= 0) && (x <= m_HeaderData.GetCols())); assert((y >= 0) && (y <= m_HeaderData.GetRows())); GetTranscriptomeEntry(XYToIndex(x,y), entry); } /////////////////////////////////////////////////////////////////////////////// /// public GetTranscriptomeEntry /// \brief Retrieve entry of specified cell for transcriptome bcel format /// /// @param index int Cell index /// @param entry CELFileTranscriptomeEntryType& Cell entry /////////////////////////////////////////////////////////////////////////////// void CCELFileData::GetTranscriptomeEntry(int index, CELFileTranscriptomeEntryType &entry) { assert((index >= 0) && (index < m_HeaderData.GetCells())); entry.Intensity = (uint16_t) RoundNumber(GetIntensity(index)); entry.Stdv = (uint16_t) RoundNumber(GetStdv(index)); entry.Pixels = (uint8_t) GetPixels(index); } /////////////////////////////////////////////////////////////////////////////// /// public GetIntensity /// \brief Retrieve intensity of specified cell /// /// @param x int X coordinates /// @param y int Y coordinates /// @return float intensity /////////////////////////////////////////////////////////////////////////////// float CCELFileData::GetIntensity(int x, int y) { assert((x >= 0) && (x <= m_HeaderData.GetCols())); assert((y >= 0) && (y <= m_HeaderData.GetRows())); return GetIntensity(XYToIndex(x,y)); } /////////////////////////////////////////////////////////////////////////////// /// public GetIntensity /// \brief Retrieve intensity of specified cell /// /// @param index int Cell index /// @return float intensity /////////////////////////////////////////////////////////////////////////////// float CCELFileData::GetIntensity(int index) { float fIntensity = 0; assert((index >= 0) && (index < m_HeaderData.GetCells())); if (m_FileFormat == TEXT_CEL) { //fIntensity = m_pEntries[index].Mean; fIntensity=MmGetFloat_I(&m_pEntries[index].Intensity); } else if (m_FileFormat == XDA_BCEL) { //fIntensity = GetFloat(&(m_pEntries[index].Intensity), m_FileFormat); fIntensity=MmGetFloat_I(&(m_pEntries[index].Intensity)); } else if (m_FileFormat == TRANSCRIPTOME_BCEL) { //fIntensity = (float) (GetUShort(&(m_pTransciptomeEntries[index].Intensity), m_FileFormat)); fIntensity=MmGetUInt16_N(&(m_pTransciptomeEntries[index].Intensity)); } else if (m_FileFormat == COMPACT_BCEL) { //fIntensity = (float) (GetUShort(m_pMeanIntensities + index, m_FileFormat)); fIntensity=MmGetUInt16_I(&m_pMeanIntensities[index]); } else { assert(0); } return fIntensity; } int CCELFileData::GetIntensities(int index,std::vector& intensities) { int idx_start=index; int idx_end=idx_start+intensities.size(); // the start and end indexes must be valid... assert((idx_start >= 0) && (idx_end <= m_HeaderData.GetCells())); // determine the format once, then copy a vectors worth of data. if (m_FileFormat == TEXT_CEL) { for (int idx=idx_start;idx= 0) && (x <= m_HeaderData.GetCols())); assert((y >= 0) && (y <= m_HeaderData.GetRows())); return GetStdv(XYToIndex(x,y)); } /////////////////////////////////////////////////////////////////////////////// /// public GetStdv /// \brief Retrieve standard deviation of specified cell /// /// @param index int Cell index /// @return float Standard deviation intensity /////////////////////////////////////////////////////////////////////////////// float CCELFileData::GetStdv(int index) { assert((index >= 0) && (index < m_HeaderData.GetCells())); float fStdev = 0; if (m_FileFormat == TEXT_CEL) //fStdev = m_pEntries[index].Stdv; fStdev=MmGetFloat_I(&m_pEntries[index].Stdv); else if (m_FileFormat == XDA_BCEL) //fStdev = GetFloat(&(m_pEntries[index].Stdv), m_FileFormat); fStdev=MmGetFloat_I(&(m_pEntries[index].Stdv)); else if (m_FileFormat == TRANSCRIPTOME_BCEL) //fStdev = (float) (GetUShort(&(m_pTransciptomeEntries[index].Stdv), m_FileFormat)); fStdev=MmGetUInt16_N(&(m_pTransciptomeEntries[index].Stdv)); else if (m_FileFormat == COMPACT_BCEL) fStdev=0; else assert(0); return fStdev; } /////////////////////////////////////////////////////////////////////////////// /// public GetPixels /// \brief Retrieve number of pixels of specified cell /// /// @param x int X coordinate of cell /// @param y int Y coordinate of cell /// @return short Number of pixels /////////////////////////////////////////////////////////////////////////////// short CCELFileData::GetPixels(int x, int y) { assert((x >= 0) && (x <= m_HeaderData.GetCols())); assert((y >= 0) && (y <= m_HeaderData.GetRows())); return GetPixels(XYToIndex(x,y)); } /////////////////////////////////////////////////////////////////////////////// /// public GetPixels /// \brief Retrieve number of pixels of specified cell /// /// @param index int Cell index /// @return short Number of pixels /////////////////////////////////////////////////////////////////////////////// short CCELFileData::GetPixels(int index) { assert((index >= 0) && (index < m_HeaderData.GetCells())); short sPixels = 0; if (m_FileFormat == TEXT_CEL) //sPixels = m_pEntries[index].Pixels; sPixels=MmGetInt16_I(&m_pEntries[index].Pixels); else if (m_FileFormat == XDA_BCEL) //sPixels = GetShort(&(m_pEntries[index].Pixels), m_FileFormat); sPixels=MmGetInt16_I(&m_pEntries[index].Pixels); else if (m_FileFormat == TRANSCRIPTOME_BCEL) //sPixels = (short) m_pTransciptomeEntries[index].Pixels; sPixels=MmGetUInt8(&m_pTransciptomeEntries[index].Pixels); else if (m_FileFormat == COMPACT_BCEL) sPixels=0; else assert(0); return sPixels; } /////////////////////////////////////////////////////////////////////////////// /// public IsMasked /// \brief Determine if specified cell is masked /// /// @param x int X coordinate of cell /// @param y int Y coordinate of cell /// @return bool true if masked cell; false otherwise /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::IsMasked(int x, int y) { assert((x >= 0) && (x <= m_HeaderData.GetCols())); assert((y >= 0) && (y <= m_HeaderData.GetRows())); return IsMasked(XYToIndex(x,y)); } /////////////////////////////////////////////////////////////////////////////// /// public IsMasked /// \brief Determine if specified cell is masked /// /// @param index int Cell index /// @return bool true if masked cell; false otherwise /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::IsMasked(int index) { assert((index >= 0) && (index < m_HeaderData.GetCells())); bool bReturn = false; std::map::iterator pos = m_MaskedCells.find(index); if (pos != m_MaskedCells.end()) bReturn = true; return bReturn; } /////////////////////////////////////////////////////////////////////////////// /// public IsOutlier /// \brief Determine if specified cell is outlier /// /// @param x int X coordinate of cell /// @param y int Y coordinate of cell /// @return bool true if outlier; false otherwise /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::IsOutlier(int x, int y) { assert((x >= 0) && (x <= m_HeaderData.GetCols())); assert((y >= 0) && (y <= m_HeaderData.GetRows())); return IsOutlier(XYToIndex(x,y)); } /////////////////////////////////////////////////////////////////////////////// /// public IsOutlier /// \brief Determine if specified cell is outlier /// /// @param index int Cell index /// @return bool true if outlier; false otherwise /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::IsOutlier(int index) { assert((index >= 0) && (index < m_HeaderData.GetCells())); bool bReturn = false; std::map::iterator pos = m_Outliers.find(index); if (pos != m_Outliers.end()) bReturn = true; return bReturn; } /////////////////////////////////////////////////////////////////////////////// /// public IsXDACompatibleFile /// \brief Determine if CEL file is in xda format /// /// @return bool true if xda; false otherwise /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::IsXDACompatibleFile() { std::string tmp_FileName; tmp_FileName=ResolveName(); #ifdef CELFILE_USE_STDSTREAM // Open the file. std::ifstream instr(tmp_FileName.c_str(), std::ios::in | std::ios::binary); if (!instr) return 0; #endif #ifdef CELFILE_USE_ZLIB gzFile instr; instr=gzopen(tmp_FileName.c_str(),"rb"); if (instr==NULL) { return 0; } #endif // Read the magic number from the file. uint32_t magic=0; ReadUInt32_I(instr, magic); bool bXDAFile = ((magic == CELL_FILE_MAGIC_NUMBER) ? true : false); #ifdef CELFILE_USE_STDSTREAM instr.close(); #endif #ifdef CELFILE_USE_ZLIB gzclose(instr); #endif return bXDAFile; } /////////////////////////////////////////////////////////////////////////////// /// public IsVersion3CompatibleFile /// \brief Determine if CEL file is in text (version 3) format /// /// @return bool true if version 3; false otherwise /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::IsVersion3CompatibleFile() { // Open the file. std::ifstream instr(m_FileName.c_str(), std::ios::in); if (!instr) return false; bool status = false; const char *Version3Line = "[CEL]"; const int MAXLINELENGTH = 16; char pszHeader[MAXLINELENGTH]; // Extract the first line which should be [CEL] for a version 3 file instr.getline(pszHeader, MAXLINELENGTH); if (strncmp(pszHeader,Version3Line,strlen(Version3Line))==0) status = true; instr.close(); return status; } /////////////////////////////////////////////////////////////////////////////// /// public IsTranscriptomeBcelFile /// \brief Determine if CEL file is in transcriptome binary format /// /// @return bool true if transcriptome binary; false otherwise /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::IsTranscriptomeBcelFile() { std::string tmp_FileName; tmp_FileName=ResolveName(); // Open the file. #ifdef CELFILE_USE_STDSTREAM std::ifstream instr(tmp_FileName.c_str(), std::ios::in | std::ios::binary); if (!instr) { return 0; } #endif #ifdef CELFILE_USE_ZLIB gzFile instr; instr=gzopen(tmp_FileName.c_str(),"rb"); if (instr==NULL) { return 0; } #endif // Read the header marker from the file. char szMarker[BCEL_HEADER_LEN]; ReadFixedCString(instr, szMarker, BCEL_HEADER_LEN); bool bTranscriptomeFile = false; if (strncmp(szMarker, BCEL_HEADER_BYTES, BCEL_HEADER_LEN) == 0) bTranscriptomeFile = true; #ifdef CELFILE_USE_STDSTREAM instr.close(); #endif #ifdef CELFILE_USE_ZLIB gzclose(instr); #endif return bTranscriptomeFile; } /////////////////////////////////////////////////////////////////////////////// /// public IsCompactCelFile /// \brief Determine if CEL file is in compact binary format /// /// @return bool true if compact; false otherwise /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::IsCompactCelFile() { std::string tmp_FileName; tmp_FileName=ResolveName(); // Open the file. #ifdef CELFILE_USE_STDSTREAM std::ifstream instr(tmp_FileName.c_str(), std::ios::in | std::ios::binary); if (!instr) { return 0; } #endif #ifdef CELFILE_USE_ZLIB gzFile instr; tmp_FileName=ResolveName(); instr=gzopen(tmp_FileName.c_str(),"rb"); if (instr==NULL) { return 0; } #endif // Read the header marker from the file. char szMarker[CCEL_HEADER_LEN]; ReadFixedCString(instr, szMarker, CCEL_HEADER_LEN); bool bCompactCelFile = false; if (strncmp(szMarker, CCEL_HEADER_BYTES, CCEL_HEADER_LEN) == 0) bCompactCelFile = true; #ifdef CELFILE_USE_STDSTREAM instr.close(); #endif #ifdef CELFILE_USE_ZLIB gzclose(instr); #endif return bCompactCelFile; } /////////////////////////////////////////////////////////////////////////////// /// public IsUnsupportedCompactCelFile /// \brief Determine if CEL file is in unsupported compact binary format /// /// @return bool true if unsupported compact; false otherwise /////////////////////////////////////////////////////////////////////////////// bool CCELFileData::IsUnsupportedCompactCelFile() { std::string tmp_FileName; tmp_FileName=ResolveName(); #ifdef CELFILE_USE_STDSTREAM // Open the file. std::ifstream instr(tmp_FileName.c_str(), std::ios::in | std::ios::binary); if (!instr) return 0; #endif #ifdef CELFILE_USE_ZLIB gzFile instr; tmp_FileName=ResolveName(); instr=gzopen(tmp_FileName.c_str(),"rb"); if (instr==NULL) { return 0; } #endif // Read the header marker from the file. char szMarker[CCEL_HEADER_LEN]; ReadFixedCString(instr, szMarker, CCEL_HEADER_LEN); bool bCompactCelFile = false; if (strncmp(szMarker, OLD_CCEL_HEADER_BYTES, CCEL_HEADER_LEN) == 0) bCompactCelFile = true; #ifdef CELFILE_USE_STDSTREAM instr.close(); #endif #ifdef CELFILE_USE_ZLIB gzclose(instr); #endif return bCompactCelFile; } /////////////////////////////////////////////////////////////////////////////// /// private DetermineFileFormat /// \brief Determine the CEL file format by checking teh file against each of the format type /// /// @return void /// /// \see IsXDACompatibleFile, IsTranscriptomeBcelFile, IsCompactCelFile /////////////////////////////////////////////////////////////////////////////// void CCELFileData::DetermineFileFormat() { if (IsXDACompatibleFile()) m_FileFormat = XDA_BCEL; else if (IsTranscriptomeBcelFile()) m_FileFormat = TRANSCRIPTOME_BCEL; else if (IsCompactCelFile()) m_FileFormat = COMPACT_BCEL; else if (IsUnsupportedCompactCelFile()) m_FileFormat = 0; else m_FileFormat = TEXT_CEL; } /////////////////////////////////////////////////////////////////////////////// /// private RoundNumber /// /// \brief Round floating point to integer /// /// @param f float Floating point /// @return unsigned short Rounded integer /// /// RoundNumber(3.3) = 3\n /// RoundNumber(3.5) = 4 /////////////////////////////////////////////////////////////////////////////// unsigned short CCELFileData::RoundNumber(float f) { assert(f >= 0); unsigned short us = (unsigned short) f; if((f - (float) us) >= 0.5) us++; return us; } /////////////////////////////////////////////////////////////////////////////// /// public constructor CCELFileData /// \brief Default constructor /// /// @return void /////////////////////////////////////////////////////////////////////////////// CCELFileData::CCELFileData() { m_strError = ""; m_FileName = ""; m_pEntries = NULL; m_pTransciptomeEntries = NULL; m_pMeanIntensities = NULL; m_FileFormat = XDA_BCEL; m_lpFileMap = NULL; m_lpData = NULL; m_bReadMaskedCells = true; m_bReadOutliers = true; m_nReadState = CEL_ALL; } /////////////////////////////////////////////////////////////////////////////// /// public destructor ~CCELFileData /// \brief Destructor /// /// @return void /////////////////////////////////////////////////////////////////////////////// CCELFileData::~CCELFileData() { Clear(); } /////////////////////////////////////////////////////////////////////////////// /// public SetError /// \brief Set error string /// /// @param str const char * Error string /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetError(const std::string& str) { m_strError = str; } void CCELFileData::SetError(const char* str) { m_strError=str; } /////////////////////////////////////////////////////////////////////////////// /// public SetGridCorners /// \brief Call CCELFileHeaderData::SetGridCorners() to set grid coordinates /// /// @param grid GridCoordinatesType Algorithm name /// @return void /// \see CCELFileHeaderData::SetGridCorners /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetGridCorners(GridCoordinatesType grid) { m_HeaderData.SetGridCorners(grid); } /////////////////////////////////////////////////////////////////////////////// /// public SetAlgorithmName /// \brief Call CCELFileHeaderData::SetAlg() to set algorithm name /// /// @param str const char * Algorithm name /// @return void /// \see CCELFileHeaderData::SetAlg /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetAlgorithmName(const char *str) { m_HeaderData.SetAlg(str); } /////////////////////////////////////////////////////////////////////////////// /// public AddAlgorithmParameter /// \brief Call CCELFileHeaderData::AddAlgorithmParameter() to add algorithm parameter /// /// @param tag const char * Algorithm parameter tag /// @param value const char * Algorithm paramter value /// @return void /// \see CCELFileHeaderData::AddAlgorithmParameter /////////////////////////////////////////////////////////////////////////////// void CCELFileData::AddAlgorithmParameter(const char *tag, const char *value) { m_HeaderData.AddAlgorithmParameter(tag, value); } /////////////////////////////////////////////////////////////////////////////// /// public SetAlgorithmParameter /// \brief Call CCELFileHeaderData::SetAlgorithmParameter() to add algorithm parameter /// /// @param tag const char * Algorithm parameter tag /// @param value const char * Algorithm paramter value /// @return void /// \see CCELFileHeaderData::SetAlgorithmParameter /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetAlgorithmParameter(const char *tag, const char *value) { m_HeaderData.SetAlgorithmParameter(tag, value); } /////////////////////////////////////////////////////////////////////////////// /// public SetAddAlgorithmParameter /// \brief Call CCELFileHeaderData::SetAddAlgorithmParameter() to add algorithm parameter /// /// @param tag const char * Algorithm parameter tag /// @param value const char * Algorithm paramter value /// @return void /// \see CCELFileHeaderData::SetAddAlgorithmParameter /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetAddAlgorithmParameter(const char *tag, const char *value) { m_HeaderData.SetAddAlgorithmParameter(tag, value); } /////////////////////////////////////////////////////////////////////////////// /// public SetFileFormat /// /// \brief Set CEL file format to determine the appropriate data storage in cell /// /// @param i int File format /// @return void /// /// \a i can be one of the following values:\n /// TEXT_CEL\n /// XDA_BCEL\n /// TRANSCRIPTOME_BCEL\n /// COMPACT_BCEL\n /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetFileFormat(int i) { assert((i >= TEXT_CEL) && (i <= COMPACT_BCEL)); m_FileFormat = i; } /////////////////////////////////////////////////////////////////////////////// /// public SetDimensions /// \brief Set array dimensions in header section and allocate memory for intensity data /// /// @param rows int Number of rows in array /// @param cols int Number of columns in array /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetDimensions(int rows, int cols) { m_HeaderData.SetRows(rows); m_HeaderData.SetCols(cols); m_HeaderData.SetCells(rows * cols); GridCoordinatesType grid; grid.upperleft.x = 1; grid.upperleft.y = 1; grid.upperright.x = cols; grid.upperright.y = 1; grid.lowerleft.x = 1; grid.lowerleft.y = rows; grid.lowerright.x = cols; grid.lowerright.y = rows; m_HeaderData.SetGridCorners(grid); /// @todo UnMap // delete [] m_lpData; // m_lpData=NULL; delete [] m_pEntries; m_pEntries=NULL; delete [] m_pTransciptomeEntries; m_pTransciptomeEntries=NULL; if (m_FileFormat == TRANSCRIPTOME_BCEL) m_pTransciptomeEntries = new CELFileTranscriptomeEntryType[rows*cols]; else if ((m_FileFormat == XDA_BCEL) || (m_FileFormat == TEXT_CEL)) m_pEntries = new CELFileEntryType[rows*cols]; else if (m_FileFormat == COMPACT_BCEL) m_pMeanIntensities = new unsigned short[rows*cols]; } /////////////////////////////////////////////////////////////////////////////// /// public SetChipType /// \brief Set chip type and regenerate header string /// /// @param str const char * Chip type /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetChipType(const char *str) { assert(str != NULL); m_HeaderData.SetChipType(str); std::string strHeader = m_HeaderData.GetHeader(); // Set the chip type in the header string. int endIndex = (int) strHeader.find(".1sq"); int startIndex = (int) strHeader.rfind(" ", endIndex); int length = (int) strHeader.length(); std::string start = strHeader.substr(0, startIndex + 1); std::string end = strHeader.substr(endIndex, length - endIndex); std::string newHeader = start + m_HeaderData.GetChipType() + end; m_HeaderData.SetHeader(newHeader.c_str()); } /////////////////////////////////////////////////////////////////////////////// /// public SetMargin /// \brief Set cell margin /// /// @param margin int Cell margin /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetMargin(int margin) { m_HeaderData.SetMargin(margin); } /////////////////////////////////////////////////////////////////////////////// /// public AllocateEntries /// \brief Allocate memory for cell entries /// /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::AllocateEntries() { delete [] m_pEntries; m_pEntries = new CELFileEntryType[m_HeaderData.GetCells()]; } /////////////////////////////////////////////////////////////////////////////// /// public overloaded SetCellEntry /// \brief Set cell file entry /// /// @param x int X coordinates /// @param y int Y coordinates /// @param pEntry CELFileEntryType * Pointer to CEL File entry for xda and text formats /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetCellEntry(int x, int y, CELFileEntryType *pEntry) { SetCellEntry(XYToIndex(x,y), pEntry); } /////////////////////////////////////////////////////////////////////////////// /// public overloaded SetCellEntry /// \brief Set cell file entry /// /// @param index int Cell index /// @param pEntry CELFileEntryType * Pointer to CEL File entry for xda and text formats /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetCellEntry(int index, CELFileEntryType *pEntry) { m_pEntries[index] = *pEntry; } /////////////////////////////////////////////////////////////////////////////// /// public AllocateEntries /// \brief Allocate memory for cell entries /// /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::AllocateTranscriptomeEntries() { delete [] m_pTransciptomeEntries; m_pTransciptomeEntries = new CELFileTranscriptomeEntryType[m_HeaderData.GetCells()]; } /////////////////////////////////////////////////////////////////////////////// /// public overloaded SetCellEntry /// \brief Set cell file entry /// /// @param x int X coordinates /// @param y int Y coordinates /// @param pEntry CELFileEntryType * Pointer to CEL File entry for transcriptome bcel format /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetTranscriptomeCellEntry(int x, int y, CELFileTranscriptomeEntryType *pEntry) { SetTranscriptomeCellEntry(XYToIndex(x,y), pEntry); } /////////////////////////////////////////////////////////////////////////////// /// public overloaded SetCellEntry /// \brief Set cell file entry /// /// @param index int Cell index /// @param pEntry CELFileEntryType * Pointer to CEL File entry for transcriptome bcel format /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetTranscriptomeCellEntry(int index, CELFileTranscriptomeEntryType *pEntry) { m_pTransciptomeEntries[index] = *pEntry; } /////////////////////////////////////////////////////////////////////////////// /// public SetIntensity /// \brief Set intensity of specified cell /// /// @param x int X coordinate of cell /// @param y int Y coordinate of cell /// @param intensity float Mean intensity /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetIntensity(int x, int y, float intensity) { assert((x >= 0) && (x <= m_HeaderData.GetCols())); assert((y >= 0) && (y <= m_HeaderData.GetRows())); SetIntensity(XYToIndex(x,y), intensity); } /////////////////////////////////////////////////////////////////////////////// /// public SetIntensity /// \brief Set intensity of specified cell /// /// @param index int Cell index /// @param intensity float Mean intensity /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetIntensity(int index, float intensity) { assert((index >= 0) && (index < m_HeaderData.GetCells())); if (m_FileFormat == TEXT_CEL) { MmSetFloat_I(&m_pEntries[index].Intensity,intensity); } else if (m_FileFormat == XDA_BCEL) { //m_pEntries[index].Mean = GetFloat(&intensity, m_FileFormat); MmSetFloat_I(&m_pEntries[index].Intensity,intensity); } else if (m_FileFormat == TRANSCRIPTOME_BCEL) { unsigned short s = RoundNumber(intensity); MmSetUInt16_N(&m_pTransciptomeEntries[index].Intensity,s); } else if (m_FileFormat == COMPACT_BCEL) { unsigned short s = RoundNumber(intensity); //m_pMeanIntensities[index] = GetUShort(&s, m_FileFormat); MmSetUInt16_I(&m_pMeanIntensities[index],s); } else { assert(0); } } /////////////////////////////////////////////////////////////////////////////// /// public SetStdv /// \brief Set standard deviation intensity of specified cell /// /// @param x int X coordinate of cell /// @param y int Y coordinate of cell /// @param stdev float Standard deviation intensity /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetStdv(int x, int y, float stdev) { assert((x >= 0) && (x <= m_HeaderData.GetCols())); assert((y >= 0) && (y <= m_HeaderData.GetRows())); SetStdv(XYToIndex(x,y), stdev); } /////////////////////////////////////////////////////////////////////////////// /// public SetStdv /// \brief Set standard deviation intensity of specified cell /// /// @param index int Cell index /// @param stdev float Standard deviation intensity /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetStdv(int index, float stdev) { assert((index >= 0) && (index < m_HeaderData.GetCells())); if (m_FileFormat == TRANSCRIPTOME_BCEL) { unsigned short s = RoundNumber(stdev); //m_pTransciptomeEntries[index].Stdv = GetUShort(&s, m_FileFormat); MmSetUInt16_N(&m_pTransciptomeEntries[index].Stdv,s); } else if ((m_FileFormat == TEXT_CEL) || (m_FileFormat == XDA_BCEL)) //m_pEntries[index].Stdv = GetFloat(&stdev, m_FileFormat); MmSetFloat_I(&m_pEntries[index].Stdv,stdev); else if (m_FileFormat != COMPACT_BCEL) assert(0); } /////////////////////////////////////////////////////////////////////////////// /// public SetPixels /// \brief Set number of pixels of specified cell /// /// @param x int X coordinate of cell /// @param y int Y coordinate of cell /// @param pixels short Number of pixels /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetPixels(int x, int y, short pixels) { assert((x >= 0) && (x <= m_HeaderData.GetCols())); assert((y >= 0) && (y <= m_HeaderData.GetRows())); SetPixels(XYToIndex(x,y), pixels); } /////////////////////////////////////////////////////////////////////////////// /// public SetPixels /// \brief Set number of pixels of specified cell /// /// @param index int Cell index /// @param pixels short Number of pixels /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetPixels(int index, short pixels) { assert((index >= 0) && (index < m_HeaderData.GetCells())); if (m_FileFormat == TRANSCRIPTOME_BCEL) //m_pTransciptomeEntries[index].Pixels = (unsigned char) pixels; MmSetUInt8(&m_pTransciptomeEntries[index].Pixels,(uint8_t)pixels); else if ((m_FileFormat == TEXT_CEL) || (m_FileFormat == XDA_BCEL)) //m_pEntries[index].Pixels = GetShort(&pixels, m_FileFormat); MmSetUInt16_I((uint16_t*)&m_pEntries[index].Pixels,(uint16_t)pixels); else if (m_FileFormat != COMPACT_BCEL) assert(0); } /////////////////////////////////////////////////////////////////////////////// /// public SetMask /// \brief Set if specified cell is masked /// /// @param x int X coordinate of cell /// @param y int Y coordinate of cell /// @param masked bool true if masked; false otherwise /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetMask(int x, int y, bool masked) { assert((x >= 0) && (x <= m_HeaderData.GetCols())); assert((y >= 0) && (y <= m_HeaderData.GetRows())); SetMask(XYToIndex(x,y), masked); } /////////////////////////////////////////////////////////////////////////////// /// public SetMask /// \brief Set if specified cell is masked /// /// @param index int Cell index /// @param masked bool true if masked; false otherwise /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetMask(int index, bool masked) { assert((index >= 0) && (index < m_HeaderData.GetCells())); if (masked) { m_MaskedCells.insert(std::make_pair(index, true)); m_HeaderData.IncrementMasked(); } else { std::map::iterator pos = m_MaskedCells.find(index); if (pos != m_MaskedCells.end()) { m_MaskedCells.erase(pos); m_HeaderData.DecrementMasked(); } } } /////////////////////////////////////////////////////////////////////////////// /// public SetOutlier /// \brief Set if specified cell is outlier /// /// @param x int X coordinate of cell /// @param y int Y coordinate of cell /// @param outlier bool true if outlier; false otherwise /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetOutlier(int x, int y, bool outlier) { assert((x >= 0) && (x <= m_HeaderData.GetCols())); assert((y >= 0) && (y <= m_HeaderData.GetRows())); SetOutlier(XYToIndex(x,y), outlier); } /////////////////////////////////////////////////////////////////////////////// /// public SetOutlier /// \brief Set if specified cell is outlier /// /// @param index int Cell index /// @param outlier bool true if outlier; false otherwise /// @return void /////////////////////////////////////////////////////////////////////////////// void CCELFileData::SetOutlier(int index, bool outlier) { assert((index >= 0) && (index < m_HeaderData.GetCells())); if (outlier) { m_Outliers.insert(std::make_pair(index, true)); m_HeaderData.IncrementOutliers(); } else { std::map::iterator pos = m_Outliers.find(index); if (pos != m_Outliers.end()) { m_Outliers.erase(pos); m_HeaderData.DecrementOutliers(); } } } ////////////////////////////////////////////////////////////////////// affxparser/src/fusion/file/CELFileData.h0000644000175200017520000012341714516003651021125 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CELFILEDATA_H_ #define _CELFILEDATA_H_ // file io options // enable reading of ".gz" celfiles. // #define CELFILE_USE_ZLIB 1 #ifndef CELFILE_USE_ZLIB #define CELFILE_USE_STDSTREAM 1 #endif ////////// // uint32_t and friends #include "file/FileIO.h" #include "file/GridCoordinates.h" #include "file/TagValuePair.h" // #include "portability/affy-base-types.h" // #include #include #include // #ifdef CELFILE_USE_ZLIB #ifndef FILEIO_WITH_ZLIB #error CELFILE_USE_ZLIB requires FILEIO_WITH_ZLIB #endif #endif #if defined (__CYGWIN__) #include #include #include #include /// Structure alignment requirement for g++ #define STRUCT_ALIGNMENT __attribute__ ((packed)) #define PAGE_SHIFT 12 /// Set page size value for memory mapping used under CYGWIN #define PAGE_SIZE (1UL << PAGE_SHIFT) /// Set page mask value for memory mapping used under CYGWIN #define PAGE_MASK (~(PAGE_SIZE-1)) #elif defined (_MSC_VER) #include /// Structure alignment requirement for g++ /// @remark Structure alignment for Visual C++ is included in #pragma #define STRUCT_ALIGNMENT #elif defined (__APPLE__) #include #include #include #include #include /// Structure alignment requirement for g++ /// @remark Structure alignment for Mac OS X is included in #pragma #define STRUCT_ALIGNMENT #else // UNIX #include #include #include #include #include /// Structure alignment requirement for g++ #define STRUCT_ALIGNMENT __attribute__ ((packed)) #endif namespace affxcel { #ifdef _MSC_VER #pragma pack(push, 1) #endif #ifdef __APPLE__ #pragma options align=packed #endif /////////////////////////////////////////////////////////////////////////////// /// typedef as CELFileEntryType /// @brief Structure of CEL file entries for text and xda format /////////////////////////////////////////////////////////////////////////////// typedef struct _CELFileEntryType { /// Intensity float Intensity /* \cond */ STRUCT_ALIGNMENT /*! \endcond */ ; /// Standard deviation intensity float Stdv /* \cond */ STRUCT_ALIGNMENT /*! \endcond */ ; /// Number of pixels short Pixels /* \cond */ STRUCT_ALIGNMENT /*! \endcond */ ; } CELFileEntryType; /////////////////////////////////////////////////////////////////////////////// /// typedef as CELFileTranscriptomeEntryType /// @brief Structure of CEL file entries for bcel format /////////////////////////////////////////////////////////////////////////////// typedef struct _CELFileTranscriptomeEntryType { /// Intensity unsigned short Intensity /* \cond */ STRUCT_ALIGNMENT /*! \endcond */ ; /// Standard deviation intensity unsigned short Stdv /* \cond */ STRUCT_ALIGNMENT /*! \endcond */ ; /// Number of pixels // The STRUCT_ALIGNMENT isnt needed for gcc. // and if used it generates a warning. unsigned char Pixels /* \cond */ /* STRUCT_ALIGNMENT */ /*! \endcond */ ; } CELFileTranscriptomeEntryType; #ifdef _MSC_VER #pragma pack(pop) #endif #ifdef __APPLE__ #pragma options align=reset #endif /////////////////////////////////////////////////////////////////////////////// /// affxcel::CCELFileHeaderData /// @brief CEL file header information /// /// @remarks Object instance is included in affxcel::CCELFileData /////////////////////////////////////////////////////////////////////////////// class CCELFileHeaderData { private: /// Magic number for identifying XDA format int m_Magic; /// CEL file format version number int m_Version; /// Number of columns in array int m_nCols; /// Number of rows in array int m_nRows; /// Number of cells in array int m_nCells; /// Header information concatenated in a string std::string m_Header; /// Algorithm name std::string m_Alg; /// Algorithm parameters std::string m_Params; /// Chip type of array std::string m_ChipType; /// DAT header string std::string m_DatHeader; /// Cell margin int m_Margin; /// Number of outliers uint32_t m_nOutliers; /// Number of masked cells uint32_t m_nMasked; /// Grid coordinates of array GridCoordinatesType m_CellGrid; /// STL map of algorithm parameters std::map m_Parameters; /// STL map of algorithm parameter keys std::map m_ParameterIndices; public: /*! Constructor */ CCELFileHeaderData(); /*! Destructor */ ~CCELFileHeaderData(); /*! Clears the members */ void Clear(); /////////////////////////////////////////////////////////////////////////////// /// inline public SetMagic /// @brief Set magic number for xda format /// @param i int Magic number /// @return void /////////////////////////////////////////////////////////////////////////////// void SetMagic(int i) { m_Magic = i; } /////////////////////////////////////////////////////////////////////////////// /// inline public GetMagic /// @brief Retrieve magic number from xda format /// @return int Magic number /////////////////////////////////////////////////////////////////////////////// int GetMagic() { return m_Magic; } /////////////////////////////////////////////////////////////////////////////// /// inline public SetVersion /// @brief Set file format version number /// @param i int Version number /// @return void /////////////////////////////////////////////////////////////////////////////// void SetVersion(int i) { m_Version = i; } /////////////////////////////////////////////////////////////////////////////// /// inline public GetVersion /// @brief Retrieve fiel format version number /// @return int Version number /////////////////////////////////////////////////////////////////////////////// int GetVersion() { return m_Version; } /////////////////////////////////////////////////////////////////////////////// /// inline public SetCols /// @brief Set number of columns in array /// @param i int Number of columns /// @return void /////////////////////////////////////////////////////////////////////////////// void SetCols(int i) { m_nCols = i; } /////////////////////////////////////////////////////////////////////////////// /// inline public GetCols /// @brief Retrieve number of columns in array /// @return int Number of columns /////////////////////////////////////////////////////////////////////////////// int GetCols() { return m_nCols; } /////////////////////////////////////////////////////////////////////////////// /// inline public SetRows /// @brief Set number of rows in array /// @param i int Number of rows /// @return void /////////////////////////////////////////////////////////////////////////////// void SetRows(int i) { m_nRows = i; } /////////////////////////////////////////////////////////////////////////////// /// inline public GetRows /// @brief Retrieve number of rows in array /// @return int Number of rows /////////////////////////////////////////////////////////////////////////////// int GetRows() { return m_nRows; } /////////////////////////////////////////////////////////////////////////////// /// inline public SetCells /// @brief Set number of cells in array /// @param i int Number of cells /// @return void /////////////////////////////////////////////////////////////////////////////// void SetCells(int i) { m_nCells = i; } /////////////////////////////////////////////////////////////////////////////// /// inline public GetCells /// @brief Retrieve number of cells in array /// @return int Number of cells /////////////////////////////////////////////////////////////////////////////// int GetCells() { return m_nCells; } /////////////////////////////////////////////////////////////////////////////// /// inline public AppendHeader /// @brief Append data to header string /// @param p const char * Header information /// @return void /////////////////////////////////////////////////////////////////////////////// void AppendHeader(const char* p) { m_Header.append(p); } /////////////////////////////////////////////////////////////////////////////// /// inline public SetHeader /// @brief Set header information of CEL File in a single string /// @param p const char * Header string /// @return void /////////////////////////////////////////////////////////////////////////////// void SetHeader(const char* p) { m_Header = p; } /////////////////////////////////////////////////////////////////////////////// /// public GetHeader /// @brief Concatenate header data into a string and return it /// @return std::string Header string /////////////////////////////////////////////////////////////////////////////// std::string GetHeader(); /////////////////////////////////////////////////////////////////////////////// /// inline public SetAlg /// @brief Set algorithm name /// @param p const char * Algorithm name /// @return void /////////////////////////////////////////////////////////////////////////////// void SetAlg(const char* p) { m_Alg = p; } /////////////////////////////////////////////////////////////////////////////// /// inline public GetAlg /// @brief Retrieve algorithm name /// @return std::string Algorithm name /////////////////////////////////////////////////////////////////////////////// std::string GetAlg() { return m_Alg; } /////////////////////////////////////////////////////////////////////////////// /// inline public SetParams /// @brief Set algorithm parameters /// @param p const char * Algorithm parameters /// @return void /////////////////////////////////////////////////////////////////////////////// void SetParams(const char* p) { m_Params = p; ParseAlgorithmParameters(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetParams /// @brief Retrieve algorithm parameters /// @return std::string Algorithm parameters /////////////////////////////////////////////////////////////////////////////// std::string GetParams() { return m_Params; } /////////////////////////////////////////////////////////////////////////////// /// public ParseChipType /// @brief Parse chip type of array from header string /// @return void /////////////////////////////////////////////////////////////////////////////// void ParseChipType(); /////////////////////////////////////////////////////////////////////////////// /// inline public SetChipType /// @brief Set chip type of array /// @param p const char * Chip type /// @return void /////////////////////////////////////////////////////////////////////////////// void SetChipType(const char* p) { m_ChipType = p; } /////////////////////////////////////////////////////////////////////////////// /// inline public GetChipType /// @brief Retrieve chip type of array /// @return std::string Chip type /////////////////////////////////////////////////////////////////////////////// std::string GetChipType() { return m_ChipType; } /////////////////////////////////////////////////////////////////////////////// /// public ParseDatHeader /// @brief Parse DAT header from header string /// @return void /////////////////////////////////////////////////////////////////////////////// void ParseDatHeader(); /////////////////////////////////////////////////////////////////////////////// /// public overloaded SetDatHeader /// @brief Set default DAT header /// @return void /////////////////////////////////////////////////////////////////////////////// void SetDatHeader(); /////////////////////////////////////////////////////////////////////////////// /// inline public overloaded SetDatHeader /// @brief Set DAT header with input string /// @param p const char * DAT header /// @return void /////////////////////////////////////////////////////////////////////////////// void SetDatHeader(const char* p) { m_DatHeader = p; } /////////////////////////////////////////////////////////////////////////////// /// inline public GetDatHeader /// @brief Retrieve DAT header /// @return std::string DAT header /////////////////////////////////////////////////////////////////////////////// std::string GetDatHeader() { return m_DatHeader; } /////////////////////////////////////////////////////////////////////////////// /// inline public SetMargin /// @brief Set cell margin /// @param i int Cell margin /// @return void /////////////////////////////////////////////////////////////////////////////// void SetMargin(int i); /////////////////////////////////////////////////////////////////////////////// /// inline public GetMargin /// @brief Retrieve cell margin /// @return int Cell margin /////////////////////////////////////////////////////////////////////////////// int GetMargin() { return m_Margin; } /////////////////////////////////////////////////////////////////////////////// /// inline public SetOutliers /// @brief Set number of outliers /// @param l unsigned int32_t Number of outliers /// @return void /////////////////////////////////////////////////////////////////////////////// void SetOutliers(uint32_t l) { m_nOutliers = l; } /////////////////////////////////////////////////////////////////////////////// /// inline public GetOutliers /// @brief Retrieve number of outliers /// @return unsigned int32_t Number of outliers /////////////////////////////////////////////////////////////////////////////// uint32_t GetOutliers() { return m_nOutliers; } /////////////////////////////////////////////////////////////////////////////// /// inline public IncrementOutliers /// @brief Increment number of outliers by 1 /// @return void /////////////////////////////////////////////////////////////////////////////// void IncrementOutliers() { m_nOutliers++; } /////////////////////////////////////////////////////////////////////////////// /// inline public DecrementOutliers /// @brief Decrement number of outliers by 1 /// @return void /////////////////////////////////////////////////////////////////////////////// void DecrementOutliers() { m_nOutliers--; } /////////////////////////////////////////////////////////////////////////////// /// inline public SetMasked /// @brief Set number of masked cells /// @param l unsigned int32_t Number of masked cells /// @return void /////////////////////////////////////////////////////////////////////////////// void SetMasked(uint32_t l) { m_nMasked = l; } /////////////////////////////////////////////////////////////////////////////// /// inline public GetMasked /// @brief Retrieve number of masked cells /// @return uint32_t Number of masked cells /////////////////////////////////////////////////////////////////////////////// uint32_t GetMasked() { return m_nMasked; } /////////////////////////////////////////////////////////////////////////////// /// inline public IncrementMasked /// @brief Increment number of masked cells by 1 /// @return void /////////////////////////////////////////////////////////////////////////////// void IncrementMasked() { m_nMasked++; } /////////////////////////////////////////////////////////////////////////////// /// inline public DecrementMasked /// @brief Decrement number of masked cells by 1 /// @return void /////////////////////////////////////////////////////////////////////////////// void DecrementMasked() { m_nMasked--; } /*! Parses the algorithm parameters from the parameter string */ void ParseAlgorithmParameters(); /*! Adds a parameter to the list * @param tag The parameter name. * @param value The parameter value. */ void AddAlgorithmParameter(std::string& tag, std::string& value); /*! Adds a parameter to the list * @param tag The parameter name. * @param value The parameter value. */ void AddAlgorithmParameter(const char *tag, const char *value); /*! Updates the parameter value. * @param tag The parameter name. * @param value The parameter value. */ void SetAlgorithmParameter(const char *tag, const char *value); /*! Set or Update the parameter value. * @param tag The parameter name. * @param value The parameter value. */ void SetAddAlgorithmParameter(std::string& tag, std::string& value); /*! Set or Update the parameter value. * @param tag The parameter name. * @param value The parameter value. */ void SetAddAlgorithmParameter(const char *tag, const char *value); /*! Retrieves a parameter value. * @param tag The parameter name. * @return The parameter value. */ std::string GetAlgorithmParameter(const char *tag); /*! Retrieves a parameter name. * @param index The index to the parameter array. * @return The parameter name. */ std::string GetAlgorithmParameterTag(int index); /*! Retrieves the number of parameter name/value pairs. * @return The number of parameter name/value pairs. */ int GetNumberAlgorithmParameters() { return (int) m_Parameters.size(); } /*! Retrieves a parameters as a formatted string. * @return The parameter name/value pairs. */ std::string GetAlgorithmParameters(); /*! Sets the grid coordinates. * @param grid The new grid coordinates. */ void SetGridCorners(GridCoordinatesType grid); /*! Parses the grid coordinates from the header. */ void ParseCorners(); /////////////////////////////////////////////////////////////////////////////// /// inline public GetGridCorners /// @brief Retrieve grid coordinates /// @return GridCoordinatesType Grid coordinates /////////////////////////////////////////////////////////////////////////////// GridCoordinatesType GetGridCorners() { return m_CellGrid; } }; ////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /// affxcel::CCELFileData /// /// @brief CEL file object /// /// Include read and write support of the following file formats: /// - text (version 3) /// - xda binary (version 4) /// - transcriptome binary (internal use only) /// - compact binary (not supported by software other than Bruce) /// /// All file formats include the same header information. The intensity data /// for each cell are stored as follows: /// - text format /// - mean (data type: float - 4 bytes) /// - stdev (data type: float - 4 bytes) /// - pixels (data type: short - 2 bytes) /// - xda binary format /// - mean (data type: float - 4 bytes) /// - stdev (data type: float - 4 bytes) /// - pixels (data type: short - 2 bytes) /// - trancriptome binary format /// - mean (data type: unsigned short - 2 bytes) /// - stdev (data type: unsigned short - 2 bytes) /// - pixels (data type: unsigned char - 1 byte) /// - compact binary format /// - mean (data type: unsigned short - 2 bytes) /////////////////////////////////////////////////////////////////////////////// class CCELFileData { public: /// CEL file formats enum { UNKNOWN = 0, TEXT_CEL = 1, XDA_BCEL = 2, TRANSCRIPTOME_BCEL = 3, COMPACT_BCEL = 4 }; /// CEL file reading states enum { CEL_ALL=1, CEL_DATA=2, CEL_OUTLIER=4, CEL_MASK=8 }; public: /*! Constructor */ CCELFileData(); /*! Destructor */ ~CCELFileData(); protected: /// CEL file format int m_FileFormat; /// Error string std::string m_strError; /// CEL file name without path std::string m_FileName; /// CEL file header data object CCELFileHeaderData m_HeaderData; /// Pointer to intensity entries for each cell (used for text and xda formats) CELFileEntryType *m_pEntries; /// Pointer to intensity entries for each cell (used for transcriptome bcel format) CELFileTranscriptomeEntryType *m_pTransciptomeEntries; /// Pointer to intensity entries for each cell (used for compact cel format) unsigned short *m_pMeanIntensities; /// STL map for masked cell coordinates std::map m_MaskedCells; /// STL map for outlier coordinates std::map m_Outliers; /// CEL file reading state int m_nReadState; /// Flag to determine if masked cell data should be read bool m_bReadMaskedCells; /// Flag to determine if outlier data should be read bool m_bReadOutliers; #ifdef _MSC_VER /// File handle used by CreateFileMapping in _MSC_VER HANDLE m_hFile; /// File map handle used by MapViewOfFile in _MSC_VER HANDLE m_hFileMap; #else /// Pointer to file object FILE* m_File; /// Memory mapping size used by mmap (POSIX) size_t m_MapLen; #endif /// Pointer to memory mapping data char *m_lpData; /// Pointer to memory mapping file view void *m_lpFileMap; /*! Opens the file. * @param bReadHeaderOnly Flag indicating if the header is only to be read. * @return True if successful. */ bool Open(bool bReadHeaderOnly = false); /*! Reads the text version of the CEL file. * @param bReadHeaderOnly Flag indicating if the header is only to be read. * @return True if successful. */ bool ReadTextCel(bool bReadHeaderOnly = false); /*! Reads the XDA version of the CEL file. * @param bReadHeaderOnly Flag indicating if the header is only to be read. * @return True if successful. */ bool ReadXDABCel(bool bReadHeaderOnly = false); /*! Reads the transcriptome groups custom CEL file. * @param bReadHeaderOnly Flag indicating if the header is only to be read. * @return True if successful. */ bool ReadTranscriptomeBCel(bool bReadHeaderOnly = false); /*! Reads the compact binary CEL file. * @param bReadHeaderOnly Flag indicating if the header is only to be read. * @return True if successful. */ bool ReadCompactBCel(bool bReadHeaderOnly = false); /*! Determines the type of CEL file. */ void DetermineFileFormat(); /*! Rounds a value to the nearest interger. * @param f The value to round. * @return The rounded value. */ unsigned short RoundNumber(float f); public: /*! Sets the error string. * @param str The error. */ void SetError(const std::string& str); void SetError(const char* str); /////////////////////////////////////////////////////////////////////////////// /// inline public GetError /// @brief Retrieve error string /// @return std::string Error string /////////////////////////////////////////////////////////////////////////////// std::string GetError() { return m_strError; } /////////////////////////////////////////////////////////////////////////////// /// inline public GetThisPtr /// @brief Retrieve the pointer of the current object instance /// @return affxcel::CCELFileData * Pointer to current object instance /////////////////////////////////////////////////////////////////////////////// affxcel::CCELFileData *GetThisPtr() { return this; } /////////////////////////////////////////////////////////////////////////////// /// inline public constant SetFileName /// @brief Set CEL file name /// @param str const char* File name /// @return void /////////////////////////////////////////////////////////////////////////////// void SetFileName(const char *str) { m_FileName = str; } /////////////////////////////////////////////////////////////////////////////// /// inline public constant GetFileName /// @brief Retrieve CEL file name /// @return std::string File name /////////////////////////////////////////////////////////////////////////////// std::string GetFileName() const { return m_FileName; } // Accessors for header information. std::string GetHeaderKey(const char* key); /////////////////////////////////////////////////////////////////////////////// /// inline public GetVersion /// @brief Call affxcel::CCELFileHeaderData::GetVersion() to retrieve file format version number /// @return int File format version number /// @see affxcel::CCELFileHeaderData::GetVersion /////////////////////////////////////////////////////////////////////////////// int GetVersion() { return m_HeaderData.GetVersion(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetCols /// @brief Call affxcel::CCELFileHeaderData::GetCols() to retrieve number of columns in array /// @return int Number of columns in array /// @see affxcel::CCELFileHeaderData::GetCols /////////////////////////////////////////////////////////////////////////////// int GetCols() { return m_HeaderData.GetCols(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetRows /// @brief Call affxcel::CCELFileHeaderData::GetRows() to retrieve number of rows in array /// @return int Number of rows in array /// @see affxcel::CCELFileHeaderData::GetRows /////////////////////////////////////////////////////////////////////////////// int GetRows() { return m_HeaderData.GetRows(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetNumCells /// @brief Call affxcel::CCELFileHeaderData::GetNumCells() to retrieve number of cells in array /// @return int Number of cells in array /// @see affxcel::CCELFileHeaderData::GetNumCells /////////////////////////////////////////////////////////////////////////////// int GetNumCells() { return m_HeaderData.GetCells(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetHeaderString /// @brief Call affxcel::CCELFileHeaderData::GetHeader() to retrieve header in a single string /// @return std::string Header string /// @see affxcel::CCELFileHeaderData::GetHeader /////////////////////////////////////////////////////////////////////////////// std::string GetHeaderString() { return m_HeaderData.GetHeader(); } /////////////////////////////////////////////////////////////////////////////// /// public GetHeader /// @brief Gets the header object. /// @return CCELFileHeaderData The header object. /////////////////////////////////////////////////////////////////////////////// CCELFileHeaderData &GetHeader() { return m_HeaderData; } /////////////////////////////////////////////////////////////////////////////// /// inline public GetAlg /// @brief Call affxcel::CCELFileHeaderData::GetAlg() to retrieve algorithm name /// @return std::string Algorithm name /// @see affxcel::CCELFileHeaderData::GetAlg /////////////////////////////////////////////////////////////////////////////// std::string GetAlg() { return m_HeaderData.GetAlg(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetParams /// @brief Call affxcel::CCELFileHeaderData::GetParams() to retrieve algorithm parameters /// @return std::string Algorithm parameters /// @see affxcel::CCELFileHeaderData::GetParams /////////////////////////////////////////////////////////////////////////////// std::string GetParams() { return m_HeaderData.GetAlgorithmParameters(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetAlgorithmParameter /// @brief Call affxcel::CCELFileHeaderData::GetAlgorithmParameter() to retrieve algorithm parameter of specified tag /// @param tag const char* Algorithm parameter tag /// @return std::string Algorithm parameter value /// @see affxcel::CCELFileHeaderData::GetAlgorithmParameter /////////////////////////////////////////////////////////////////////////////// std::string GetAlgorithmParameter(const char *tag) { return m_HeaderData.GetAlgorithmParameter(tag); } /*! Retrieves the algorithm parameter name (tag) for a given index position. * @param index The zero based index to the parameter array (0 to the number of alg parameters - 1). * @return The parameter name (tag). */ std::string GetAlgorithmParameterTag(int index) { return m_HeaderData.GetAlgorithmParameterTag(index); } /*! Retrieves the number of algorithm parameters. * @return The number of algorithm parameters. */ int GetNumberAlgorithmParameters() { return m_HeaderData.GetNumberAlgorithmParameters(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetAlgorithmParameters /// @brief Call affxcel::CCELFileHeaderData::GetAlgorithmParameters() to retrieve algorithm parameters /// @return std::string Algorithm parameters /// @see affxcel::CCELFileHeaderData::GetAlgorithmParameters /////////////////////////////////////////////////////////////////////////////// std::string GetAlgorithmParameters() { return m_HeaderData.GetAlgorithmParameters(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetDatHeader /// @brief Call affxcel::CCELFileHeaderData::GetDatHeader() to retrieve DAT header /// @return std::string DAT header /// @see affxcel::CCELFileHeaderData::GetDatHeader /////////////////////////////////////////////////////////////////////////////// std::string GetDatHeader() { return m_HeaderData.GetDatHeader(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetChipType /// @brief Call affxcel::CCELFileHeaderData::GetChipType() to retrieve chip type /// @return std::string Chip type /// @see affxcel::CCELFileHeaderData::GetChipType /////////////////////////////////////////////////////////////////////////////// std::string GetChipType() { return m_HeaderData.GetChipType(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetCellMargin /// @brief Call affxcel::CCELFileHeaderData::GetCellMargin() to retrieve cell margin /// @return int Cell margin /// @see affxcel::CCELFileHeaderData::GetCellMargin /////////////////////////////////////////////////////////////////////////////// int GetCellMargin() { return m_HeaderData.GetMargin(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetNumOutliers /// @brief Call affxcel::CCELFileHeaderData::GetNumOutliers() to retrieve number of outliers /// @return uint32_t Number of outliers /// @see affxcel::CCELFileHeaderData::GetNumOutliers /////////////////////////////////////////////////////////////////////////////// uint32_t GetNumOutliers() { return m_HeaderData.GetOutliers(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetNumMasked /// @brief Call affxcel::CCELFileHeaderData::GetNumMasked() to retrieve number of masked cells /// @return uint32_t Number of masked cells /// @see affxcel::CCELFileHeaderData::GetNumMasked /////////////////////////////////////////////////////////////////////////////// uint32_t GetNumMasked() { return m_HeaderData.GetMasked(); } /////////////////////////////////////////////////////////////////////////////// /// inline public GetGridCorners /// @brief Call affxcel::CCELFileHeaderData::GetGridCorners() to retrieve grid coordinates /// @return GridCoordinatesType Grid coordinates /// @see affxcel::CCELFileHeaderData::GetGridCorners /////////////////////////////////////////////////////////////////////////////// GridCoordinatesType GetGridCorners() { return m_HeaderData.GetGridCorners(); } // Index/position conversions /////////////////////////////////////////////////////////////////////////////// /// inline public IndexToX /// @brief Get x coordinates from index /// @return int X coordinates /////////////////////////////////////////////////////////////////////////////// int IndexToX(int index) { return index % m_HeaderData.GetCols(); } /////////////////////////////////////////////////////////////////////////////// /// inline public IndexToY /// @brief Get y coordinates from index /// @return int Y coordinates /////////////////////////////////////////////////////////////////////////////// int IndexToY(int index) { return index / m_HeaderData.GetCols(); } /////////////////////////////////////////////////////////////////////////////// /// inline public XYToIndex /// @brief Convert x, y coordinates to index /// @return int Index /////////////////////////////////////////////////////////////////////////////// int XYToIndex(int x, int y) { return XYToIndex(x,y, m_HeaderData.GetRows(), m_HeaderData.GetCols()); } /*! Maps X/Y coordinates to CEL file index. * @param x The x coordinate. * @param y The y coordinate. * @param r The number of rows. * @param c The number of columns. * @return The index to the intensity arrays. */ static int XYToIndex(int x, int y, int r, int c) { return ((y*c) + x); } /*! Retrieves a CEL file entry. * @param index The index to the CEL file entries. * @param entry The CEL file entry. */ void GetEntry(int index, CELFileEntryType &entry); /*! Retrieves a CEL file entry. * @param x The X coordinate. * @param y The Y coordinate. * @param entry The CEL file entry. */ void GetEntry(int x, int y, CELFileEntryType &entry); /*! Retrieves a CEL file entry. * @param index The index to the CEL file entries. * @param entry The CEL file entry. */ void GetTranscriptomeEntry(int index, CELFileTranscriptomeEntryType &entry); /*! Retrieves a CEL file entry. * @param x The X coordinate. * @param y The Y coordinate. * @param entry The CEL file entry. */ void GetTranscriptomeEntry(int x, int y, CELFileTranscriptomeEntryType &entry); /*! Retrieves a CEL file intensity. * @param index The index to the CEL file entries. * @return The CEL file intensity. */ float GetIntensity(int index); /// @brief Get a vector of intensities with one call. /// @param index starting index /// @param intensities vector to fill. /// @return non-zero on error int GetIntensities(int index,std::vector& intensities); /*! Retrieves a CEL file intensity. * @param x The X coordinate. * @param y The Y coordinate. * @return The CEL file intensity. */ float GetIntensity(int x, int y); /*! Retrieves a CEL file stdv value. * @param index The index to the CEL file entries. * @return The CEL file stdv value. */ float GetStdv(int index); /*! Retrieves a CEL file stdv value. * @param x The X coordinate. * @param y The Y coordinate. * @return The CEL file stdv value. */ float GetStdv(int x, int y); /*! Retrieves a CEL file pixel count. * @param index The index to the CEL file entries. * @return The CEL file pixel count. */ short GetPixels(int index); /*! Retrieves a CEL file pixel count. * @param x The X coordinate. * @param y The Y coordinate. * @return The CEL file pixel count. */ short GetPixels(int x, int y); /*! Retrieves a CEL file mask flag. * @param x The X coordinate. * @param y The Y coordinate. * @return True if the feature is masked. */ bool IsMasked(int x, int y); /*! Retrieves a CEL file mask flag. * @param index The index to the CEL file entries. * @return True if the feature is masked. */ bool IsMasked(int index); /*! Retrieves a CEL file outlier flag. * @param x The X coordinate. * @param y The Y coordinate. * @return True if the feature is an outlier. */ bool IsOutlier(int x, int y); /*! Retrieves a CEL file outlier flag. * @param index The index to the CEL file entries. * @return True if the feature is an outlier. */ bool IsOutlier(int index); // For reading a file. /*! Closes the file */ void Close(); /*! Returns the file size. * @return The file size. */ uint32_t GetFileSize(); /*! Checks if the file exists. * @return True if the file exists. */ bool Exists(); std::string ResolveName(); /*! Reads the header of the CEL file. * @return True if successful. */ bool ReadHeader(); /*! Reads the CEL file. * @param bIncludeMaskAndOutliers Flag to indicate if the mask and outlier sections should also be read. * @return True if successful. */ bool Read(bool bIncludeMaskAndOutliers=true); /*! Checks if the file type is XDA. * @return True if XDA type. */ bool IsXDACompatibleFile(); /*! Checks if the file type is version 3. * @return True if version 3 type. */ bool IsVersion3CompatibleFile(); /*! Checks if the file type is transcriptome. * @return True if transcriptome type. */ bool IsTranscriptomeBcelFile(); /*! Checks if the file type is compact. * @return True if compact type. */ bool IsCompactCelFile(); /*! Checks if the file type is previous unspported version of compact. * @return True if compact type. */ bool IsUnsupportedCompactCelFile(); /*! Determine CEL file format and call appropriate function to read file using the specified file name. * @param filename The name of the file. * @param nState Reading state * @return bool true if success, false if fail * * \a nState can be one or combination of the following values: * CEL_ALL Read all information in file (default) * CEL_DATA Read header and intensities only * CEL_OUTLIER Read header, intensities and outliers * CEL_MASK Read header, intensities and masked cells */ bool ReadEx(const char *filename=0, int nState=CEL_ALL); /*! Returns the reading state. * @return The reading state. */ int GetReadState() { return m_nReadState; }; // For writing a new CEL file /*! Clears the members. */ void Clear(); /*! Get the file format type. * @return The file format type. */ int GetFileFormat() { return m_FileFormat; } /*! Sets the file format type. * @param i The file format type. */ void SetFileFormat(int i); /*! Sets the name of the algorithm used to create the CEL file. * @param str The algorithm name. */ void SetAlgorithmName(const char *str); /*! Adds a parameter to the parameter list. * @param tag The parameter name. * @param value The parameter value. */ void SetAlgorithmParameter(const char *tag, const char *value); /*! Adds a parameter to the parameter list. * @param tag The parameter name. * @param value The parameter value. */ void AddAlgorithmParameter(const char *tag, const char *value); /*! Adds a parameter to the parameter list. * @param tag The parameter name. * @param value The parameter value. */ void SetAddAlgorithmParameter(const char *tag, const char *value); /*! Sets dimentions (rows/cols) of the CEL file. * @param rows The number of rows. * @param cols The number of columns. */ void SetDimensions(int rows, int cols); /*! Sets probe array (chip) type. * @param str The probe array type. */ void SetChipType(const char *str); /*! Sets the margin used in creating the CEL file. * @param margin The margin used in creating the CEL file. */ void SetMargin(int margin); /*! Sets the grid coordinates. * @param grid The grid coordinates. */ void SetGridCorners(GridCoordinatesType grid); /*! Sets a CEL file entry. * @param x The X coordinate. * @param y The Y coordinate. * @param pEntry The CEL file entry. */ void SetCellEntry(int x, int y, CELFileEntryType *pEntry); /*! Sets a CEL file entry. * @param index The index to the CEL file entries. * @param pEntry The CEL file entry. */ void SetCellEntry(int index, CELFileEntryType *pEntry); /*! Allocates entries for a text or XDA file. */ void AllocateEntries(); /*! Sets a CEL file entry. * @param x The X coordinate. * @param y The Y coordinate. * @param pEntry The CEL file entry. */ void SetTranscriptomeCellEntry(int x, int y, CELFileTranscriptomeEntryType *pEntry); /*! Sets a CEL file entry. * @param index The index to the CEL file entries. * @param pEntry The CEL file entry. */ void SetTranscriptomeCellEntry(int index, CELFileTranscriptomeEntryType *pEntry); /*! Allocates memory for the transcriptome file type entries. */ void AllocateTranscriptomeEntries(); /*! Sets the intensity value. * @param index The index to the CEL file. * @param intensity The intensity value. */ void SetIntensity(int index, float intensity); /*! Sets the intensity value. * @param x The X coordinate. * @param y The Y coordinate. * @param intensity The intensity value. */ void SetIntensity(int x, int y, float intensity); /*! Sets the stdev value. * @param index The index to the CEL file. * @param stdev The stdev value. */ void SetStdv(int index, float stdev); /*! Sets the stdev value. * @param x The X coordinate. * @param y The Y coordinate. * @param stdev The stdev value. */ void SetStdv(int x, int y, float stdev); /*! Sets the pixel count. * @param index The index to the CEL file. * @param pixels The pixel count. */ void SetPixels(int index, short pixels); /*! Sets the pixel count. * @param x The X coordinate. * @param y The Y coordinate. * @param pixels The pixel count. */ void SetPixels(int x, int y, short pixels); /*! Sets the mask flag. * @param index The index to the CEL file. * @param mask The mask flag. */ void SetMask(int index, bool mask); /*! Sets the mask flag. * @param x The X coordinate. * @param y The Y coordinate. * @param masked The mask flag. */ void SetMask(int x, int y, bool masked); /*! Sets the outlier flag. * @param index The index to the CEL file. * @param outlier The outlier flag. */ void SetOutlier(int index, bool outlier); /*! Sets the outlier flag. * @param x The X coordinate. * @param y The Y coordinate. * @param outlier The outlier flag. */ void SetOutlier(int x, int y, bool outlier); /*! Unmap the file. */ void Munmap(); /*! Store data in memory if CEL file is memory mapped. */ void EnsureNotMmapped(); }; ////////////////////////////////////////////////////////////////////// } // namespace //////////////////////////////////////////////////////////////////// #endif // _CELFILEDATA_H_ affxparser/src/fusion/file/CELFileWriter.cpp0000644000175200017520000004764414516003651022072 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/CELFileWriter.h" // #include "file/FileIO.h" #include "file/FileWriter.h" // #include #include #include #include #include #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #ifdef HAVE_SNPRINTF // If not using visual c++'s _snprintf include snprintf. extern "C" { #include "snprintf.h" } #else // otherwise use _snprintf where normally use snprintf. #define snprintf _snprintf #endif // HAVE_SNPRINTF #endif // _MSC_VER using namespace affxcel; ////////////////////////////////////////////////////////////////////// /// Delimiter character in DAT header #define DELIMCHAR 0x14 /// Minimum number of characters in cell data row in text format #define MIN_CELLSTR 2 /// Xda format identifier #define CELL_FILE_MAGIC_NUMBER 0x00000040 // 64 /// Version number for xda format #define CELL_FILE_VERSION_NUMBER 0x00000004 // 4 /// Trascriptome bcel format identifier #define BCEL_HEADER_BYTES "BCEL\r\n\032\n" /// Size of trascriptome bcel format identifier #define BCEL_HEADER_LEN 8 /// Header information size in trascriptome bcel format #define STRUCT_SIZE_BCEL_HEADER_INFO 66 /// Feature data size in trascriptome bcel format #define STRUCT_SIZE_FEATURE_DATA 5 /// Masked cells and outlier data size in trascriptome bcel format #define STRUCT_SIZE_XY_PAIR 8 /// Size of anonymous 4-byte identifier in trascriptome bcel format #define BCEL_CHUNK_LEN_SIZE 4 /// Size of section name identifier in trascriptome bcel format #define BCEL_CHUNK_NAME_SIZE 4 /// Size of section separator in trascriptome bcel format #define BCEL_CHUNK_CRC_SIZE 4 /// Header section identifier in trascriptome bcel format #define BCEL_CHUNK_HEAD "HEAD" /// DAT header section identifier in trascriptome bcel format #define BCEL_CHUNK_DTHD "DTHD" /// Algorithm section identifier in trascriptome bcel format #define BCEL_CHUNK_ALGM "ALGM" /// Algorithm parameter section identifier in trascriptome bcel format #define BCEL_CHUNK_ALPR "ALPR" /// Intensity section identifier in trascriptome bcel format #define BCEL_CHUNK_INTY "INTY" /// Masked cell section identifier in trascriptome bcel format #define BCEL_CHUNK_MASK "MASK" /// Outlier section identifier in trascriptome bcel format #define BCEL_CHUNK_OUTL "OUTL" /// End of file identifier in trascriptome bcel format #define BCEL_CHUNK_TAIL "TAIL" /// Compact cel format identifier #define CCEL_HEADER_BYTES "CCEL\r\n\128\n" /// Unspported version of compact cel format identifier #define OLD_CCEL_HEADER_BYTES "CCEL\r\n\064\n" /// Size of compact cel format identifier #define CCEL_HEADER_LEN 8 #ifdef _MSC_VER /// Line separator for _MSC_VER #define LINE_SEPARATOR "\n" #else /// Line separator for unix/linux #define LINE_SEPARATOR "\r\n" #endif #ifndef PAGE_SIZE /// Page size used for memory mapping in non Windows environment #define PAGE_SIZE (getpagesize()) #endif #ifndef PAGE_MASK /// Page mask used for memory mapping in non Windows environment #define PAGE_MASK ~(PAGE_SIZE-1) #endif #ifndef PAGE_TRUNC /// Page truncation pointer used for memory mapping in non Windows environment #define PAGE_TRUNC(ptr) (ptr&(PAGE_MASK)) #endif /////////////////////////////////////////////////////////////////////////////// /// public constructor CCELFileWriter /// \brief Default constructor /// /// @return void /////////////////////////////////////////////////////////////////////////////// CCELFileWriter::CCELFileWriter() : CCELFileData() { } /////////////////////////////////////////////////////////////////////////////// /// public destructor ~CCELFileWriter /// \brief Destructor /// /// @return void /////////////////////////////////////////////////////////////////////////////// CCELFileWriter::~CCELFileWriter() { } /////////////////////////////////////////////////////////////////////////////// /// public WriteTextCel /// \brief ::iterator pos = m_MaskedCells.begin(); pos != m_MaskedCells.end(); pos++) { length = snprintf(szBuffer,sizeof(szBuffer), "%d\t%d%s", IndexToX(pos->first), IndexToY(pos->first), LINE_SEPARATOR); newCelFile.write(szBuffer, length); } length = snprintf(szBuffer,sizeof(szBuffer), "%s", LINE_SEPARATOR); newCelFile.write(szBuffer, length); // Write the outlier data length = snprintf(szBuffer,sizeof(szBuffer), "[OUTLIERS]%s", LINE_SEPARATOR); newCelFile.write(szBuffer, length); length = snprintf(szBuffer,sizeof(szBuffer), "NumberCells=%d%s", m_HeaderData.GetOutliers(), LINE_SEPARATOR); newCelFile.write(szBuffer, length); length = snprintf(szBuffer,sizeof(szBuffer), "CellHeader=X\tY%s", LINE_SEPARATOR); newCelFile.write(szBuffer, length); for(std::map::iterator pos = m_Outliers.begin(); pos != m_Outliers.end(); pos++) { length = snprintf(szBuffer,sizeof(szBuffer), "%d\t%d%s", IndexToX(pos->first), IndexToY(pos->first), LINE_SEPARATOR); newCelFile.write(szBuffer, length); } // Write the blank modified section. length = snprintf(szBuffer,sizeof(szBuffer), "%s", LINE_SEPARATOR); newCelFile.write(szBuffer, length); length = snprintf(szBuffer,sizeof(szBuffer), "[MODIFIED]%s", LINE_SEPARATOR); newCelFile.write(szBuffer, length); length = snprintf(szBuffer,sizeof(szBuffer), "NumberCells=0%s", LINE_SEPARATOR); newCelFile.write(szBuffer, length); length = snprintf(szBuffer,sizeof(szBuffer), "CellHeader=X\tY\tORIGMEAN%s", LINE_SEPARATOR); newCelFile.write(szBuffer, length); length = snprintf(szBuffer,sizeof(szBuffer), "%s", LINE_SEPARATOR); newCelFile.write(szBuffer, length); // Close the file and check the status. newCelFile.close(); return !newCelFile.fail(); } /////////////////////////////////////////////////////////////////////////////// /// public WriteXDABCel /// \brief Write CEL file in xda binary format /// /// @return bool true if success; false otherwise /// /// \remark All multi-byte numeric values are stored in little endian byte order /////////////////////////////////////////////////////////////////////////////// bool CCELFileWriter::WriteXDABCel() { if (m_FileName.length() == 0) { SetError("No file name is set for file creation."); return false; } std::ofstream newCelFile; newCelFile.open(m_FileName.c_str(), std::ios::out | std::ios::binary); if (!newCelFile) { SetError("Unable to open the file."); return false; } m_HeaderData.SetDatHeader(); // Write the header. WriteUInt32_I(newCelFile, CELL_FILE_MAGIC_NUMBER); WriteUInt32_I(newCelFile, CELL_FILE_VERSION_NUMBER); // Write the dimensions of the array WriteUInt32_I(newCelFile, m_HeaderData.GetRows()); WriteUInt32_I(newCelFile, m_HeaderData.GetCols()); WriteUInt32_I(newCelFile, m_HeaderData.GetCells()); // Write the other members. WriteString_I(newCelFile, m_HeaderData.GetHeader()); WriteString_I(newCelFile, m_HeaderData.GetAlg()); WriteString_I(newCelFile, m_HeaderData.GetAlgorithmParameters()); WriteUInt32_I(newCelFile, m_HeaderData.GetMargin()); WriteUInt32_I(newCelFile, m_HeaderData.GetOutliers()); WriteUInt32_I(newCelFile, m_HeaderData.GetMasked()); WriteUInt32_I(newCelFile, 0); // Write the Mean data for (int iCell=0; iCell < m_HeaderData.GetCells(); iCell++) { int t_x=IndexToX(iCell); int t_y=IndexToY(iCell); float t_mean=GetIntensity(t_x,t_y); float t_stdv=GetStdv(t_x,t_y); uint16_t t_pixel=GetPixels(t_x,t_y); WriteFloatLowPrecision(newCelFile,t_mean); WriteFloatLowPrecision(newCelFile,t_stdv); WriteUInt16_I(newCelFile,t_pixel); } // Write the mask data for(std::map::iterator pos = m_MaskedCells.begin(); pos != m_MaskedCells.end(); pos++) { WriteUInt16_I(newCelFile, (uint16_t) IndexToX(pos->first)); WriteUInt16_I(newCelFile, (uint16_t) IndexToY(pos->first)); } // Write the outlier data for(std::map::iterator pos = m_Outliers.begin(); pos != m_Outliers.end(); pos++) { WriteUInt16_I(newCelFile, (uint16_t) IndexToX(pos->first)); WriteUInt16_I(newCelFile, (uint16_t) IndexToY(pos->first)); } // Close the file and check the status. newCelFile.close(); return !newCelFile.fail(); } /////////////////////////////////////////////////////////////////////////////// /// public WriteTranscriptomeBCel /// \brief Write CEL file in transcriptome binary format /// /// @return bool true if success; false otherwise /// /// \remark All multi-byte numeric values are stored in big endian byte order /////////////////////////////////////////////////////////////////////////////// bool CCELFileWriter::WriteTranscriptomeBCel() { if (m_FileName.length() == 0) { SetError("No file name is set for file creation."); return false; } std::ofstream newCelFile; newCelFile.open(m_FileName.c_str(), std::ios::out | std::ios::binary); if (!newCelFile) { SetError("Unable to open the file."); return false; } m_HeaderData.SetDatHeader(); // Write the header WriteFixedString(newCelFile, BCEL_HEADER_BYTES, BCEL_HEADER_LEN); WriteFloat_N(newCelFile, (float)m_HeaderData.GetVersion()); WriteUInt32_N(newCelFile, STRUCT_SIZE_BCEL_HEADER_INFO); WriteFixedString(newCelFile, BCEL_CHUNK_HEAD, BCEL_CHUNK_NAME_SIZE); WriteFloat_N(newCelFile, (float)m_HeaderData.GetVersion()); WriteUInt32_N(newCelFile, m_HeaderData.GetCols()); // columns WriteUInt32_N(newCelFile, m_HeaderData.GetRows()); // rows WriteUInt32_N(newCelFile, m_HeaderData.GetCols()); // totalx WriteUInt32_N(newCelFile, m_HeaderData.GetRows()); // totaly WriteUInt32_N(newCelFile, 0); // offsetx WriteUInt32_N(newCelFile, 0); // offsety GridCoordinatesType grid = m_HeaderData.GetGridCorners(); WriteUInt32_N(newCelFile, grid.upperleft.x); // GridCornerUL.x WriteUInt32_N(newCelFile, grid.upperleft.y); // GridCornerUL.y WriteUInt32_N(newCelFile, grid.upperright.x); // GridCornerUR.x WriteUInt32_N(newCelFile, grid.upperright.y); // GridCornerUR.y WriteUInt32_N(newCelFile, grid.lowerleft.x); // GridCornerLL.y WriteUInt32_N(newCelFile, grid.lowerleft.y); // GridCornerLL.x WriteUInt32_N(newCelFile, grid.lowerright.x); // GridCornerLR.x WriteUInt32_N(newCelFile, grid.lowerright.y); // GridCornerLR.x WriteUInt16_N(newCelFile, 0); // Axis-invertX WriteUInt16_N(newCelFile, 0); // AxisInvertY WriteUInt16_N(newCelFile, 0); // swapXY WriteUInt32_N(newCelFile, 0); int length = (int) m_HeaderData.GetDatHeader().length(); // DatHeader WriteUInt32_N(newCelFile, length); WriteFixedString(newCelFile, BCEL_CHUNK_DTHD, BCEL_CHUNK_NAME_SIZE); if (length > 0) WriteFixedString(newCelFile, m_HeaderData.GetDatHeader(), length); else { SetError("Missing DatHeader."); Close(); return 0; } WriteUInt32_N(newCelFile, 0); length = (int) m_HeaderData.GetAlg().length(); // Algorithm WriteUInt32_N(newCelFile, length); WriteFixedString(newCelFile, BCEL_CHUNK_ALGM, BCEL_CHUNK_NAME_SIZE); if (length > 0) WriteFixedString(newCelFile, m_HeaderData.GetAlg(), length); else { SetError("Missing Algorithm."); Close(); return 0; } WriteUInt32_N(newCelFile, 0); length = (int) m_HeaderData.GetAlgorithmParameters().length(); // AlgorithmParameters WriteUInt32_N(newCelFile, length); WriteFixedString(newCelFile, BCEL_CHUNK_ALPR, BCEL_CHUNK_NAME_SIZE); if (length > 0) WriteFixedString(newCelFile, m_HeaderData.GetAlgorithmParameters(), length); else { SetError("Missing Algorithm Parameters."); Close(); return 0; } WriteUInt32_N(newCelFile, 0); // Write the Mean data WriteUInt32_N(newCelFile,(uint32_t)(m_HeaderData.GetCells() * STRUCT_SIZE_FEATURE_DATA)); WriteFixedString(newCelFile, BCEL_CHUNK_INTY, BCEL_CHUNK_NAME_SIZE); for (int iCell=0; iCell < m_HeaderData.GetCells(); iCell++) { int t_x=IndexToX(iCell); int t_y=IndexToY(iCell); float t_mean=GetIntensity(t_x,t_y); float t_stdv=GetStdv(t_x,t_y); uint16_t t_pixel=GetPixels(t_x,t_y); WriteUInt16_N(newCelFile,(uint16_t)RoundNumber(t_mean)); WriteUInt16_N(newCelFile,(uint16_t)RoundNumber(t_stdv)); WriteUInt8(newCelFile,(uint8_t)t_pixel); } WriteUInt32_N(newCelFile, 0); // crc // Write the mask data WriteUInt32_N(newCelFile,(uint32_t)(m_HeaderData.GetMasked() * STRUCT_SIZE_XY_PAIR)); WriteFixedString(newCelFile, BCEL_CHUNK_MASK, BCEL_CHUNK_NAME_SIZE); for(std::map::iterator pos = m_MaskedCells.begin(); pos != m_MaskedCells.end(); pos++) { WriteUInt32_N(newCelFile, (uint32_t) IndexToX(pos->first)); WriteUInt32_N(newCelFile, (uint32_t) IndexToY(pos->first)); } WriteUInt32_N(newCelFile, 0); // Write the outlier data WriteUInt32_N(newCelFile, (uint32_t) (m_HeaderData.GetOutliers() * STRUCT_SIZE_XY_PAIR)); WriteFixedString(newCelFile, BCEL_CHUNK_OUTL, BCEL_CHUNK_NAME_SIZE); for(std::map::iterator pos = m_Outliers.begin(); pos != m_Outliers.end(); pos++) { WriteUInt32_N(newCelFile, (uint32_t) IndexToX(pos->first)); WriteUInt32_N(newCelFile, (uint32_t) IndexToY(pos->first)); } WriteUInt32_N(newCelFile, 0); // Write tail WriteUInt32_N(newCelFile, 0); WriteFixedString(newCelFile, BCEL_CHUNK_TAIL, BCEL_CHUNK_NAME_SIZE); WriteUInt32_N(newCelFile, 0); // Close the file and check the status. newCelFile.close(); return !newCelFile.fail(); } /////////////////////////////////////////////////////////////////////////////// /// public WriteCompactBCel /// \brief Write CEL file in compact binary format /// /// @return bool true if success; false otherwise /// /// \remark All multi-byte numeric values are stored in little endian byte order /////////////////////////////////////////////////////////////////////////////// bool CCELFileWriter::WriteCompactBCel() { if (m_FileName.length() == 0) { SetError("No file name is set for file creation."); return false; } std::ofstream newCelFile; newCelFile.open(m_FileName.c_str(), std::ios::out | std::ios::binary); if (!newCelFile) { SetError("Unable to open the file."); return false; } m_HeaderData.SetDatHeader(); // Write the header WriteFixedString(newCelFile, CCEL_HEADER_BYTES, CCEL_HEADER_LEN); WriteUInt32_I(newCelFile, m_HeaderData.GetVersion()); // Write the dimensions of the array WriteUInt32_I(newCelFile, m_HeaderData.GetRows()); WriteUInt32_I(newCelFile, m_HeaderData.GetCols()); WriteUInt32_I(newCelFile, m_HeaderData.GetCells()); // Write the other members. WriteString_I(newCelFile, m_HeaderData.GetHeader().c_str()); WriteString_I(newCelFile, m_HeaderData.GetAlg().c_str()); WriteString_I(newCelFile, m_HeaderData.GetAlgorithmParameters().c_str()); WriteUInt32_I(newCelFile, m_HeaderData.GetMargin()); WriteUInt32_I(newCelFile, m_HeaderData.GetMasked()); WriteUInt32_I(newCelFile, 0); // Write the Mean data for (int iCell=0; iCell < m_HeaderData.GetCells(); iCell++) { int t_x=IndexToX(iCell); int t_y=IndexToY(iCell); float t_mean=GetIntensity(t_x,t_y); WriteUInt16_I(newCelFile,(uint16_t)RoundNumber(t_mean)); } // Write the mask data for(std::map::iterator pos = m_MaskedCells.begin(); pos != m_MaskedCells.end(); pos++) { WriteUInt16_I(newCelFile, (uint16_t) IndexToX(pos->first)); WriteUInt16_I(newCelFile, (uint16_t) IndexToY(pos->first)); } // Close the file and check the status. newCelFile.close(); return !newCelFile.fail(); } ////////////////////////////////////////////////////////////////////// affxparser/src/fusion/file/CELFileWriter.h0000644000175200017520000000637214516003651021530 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CELFILEWRITER_H_ #define _CELFILEWRITER_H_ /*! \file CELFileWriter.h This file provides CEL file writing capabilities. */ #include "file/CELFileData.h" // namespace affxcel { /////////////////////////////////////////////////////////////////////////////// /// affxcel::CCELFileWriter /// /// \brief CEL file writer object /// /// Include write support of the following file formats: /// - text (version 3) /// - xda binary (version 4) /// - transcriptome binary (internal use only) /// - compact binary (not supported by software other than Bruce) /// /// All file formats include the same header information. The intensity data /// for each cell are stored as follows: /// - text format /// - mean (data type: float - 4 bytes) /// - stdev (data type: float - 4 bytes) /// - pixels (data type: short - 2 bytes) /// - xda binary format /// - mean (data type: float - 4 bytes) /// - stdev (data type: float - 4 bytes) /// - pixels (data type: short - 2 bytes) /// - trancriptome binary format /// - mean (data type: unsigned short - 2 bytes) /// - stdev (data type: unsigned short - 2 bytes) /// - pixels (data type: unsigned char - 1 byte) /// - compact binary format /// - mean (data type: unsigned short - 2 bytes) /////////////////////////////////////////////////////////////////////////////// class CCELFileWriter : public CCELFileData { public: /*! Constructor */ CCELFileWriter(); /*! Destructor */ ~CCELFileWriter(); public: /////////////////////////////////////////////////////////////////////////////// /// inline public GetThisPtr /// \brief Retrieve the pointer of the current object instance /// @return affxcel::CCELFileWriter * Pointer to current object instance /////////////////////////////////////////////////////////////////////////////// affxcel::CCELFileWriter *GetThisPtr() { return this; } /*! Writes a version 3 ASCII text CEL file. * @return True if successful */ bool WriteTextCel(); /*! Writes an XDA binary CEL file. * @return True if successful */ bool WriteXDABCel(); /*! Writes a transcriptome binary CEL file. * @return True if successful */ bool WriteTranscriptomeBCel(); /*! Writes a compact binary CEL file. * @return True if successful */ bool WriteCompactBCel(); }; ////////////////////////////////////////////////////////////////////// } // namespace //////////////////////////////////////////////////////////////////// #endif // _CELFILEWRITER_H_ affxparser/src/fusion/file/CHPFileBufferWriter.cpp0000644000175200017520000001300414516003651023212 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file CHPFileBufferWriter.cpp * @author David Le * @date Mon May 15 12:09:42 2006 * * @brief Class for writing signals to a buffer before writing to CHP files. */ #include "file/CHPFileBufferWriter.h" // #include "file/CHPFileUpdater.h" // using namespace affxchpwriter; CCHPFileBufferWriter::CCHPFileBufferWriter() { m_BufferSize = 0; m_MaxBufferSize = MAX_BUFFER_SIZE; m_IsGenotype = false; // default is expression } CCHPFileBufferWriter::~CCHPFileBufferWriter() { FlushBuffer(); Cleanup(); } void CCHPFileBufferWriter::Cleanup() { if (m_IsGenotype) { for (int target=0; target *CHPFileNames, bool IsGenotype) { m_CHPFileNames = CHPFileNames; Cleanup(); for (int i=0; isize(); i++) { if (IsGenotype) { std::vector entryBuffer; m_TargetGenotypeEntryBuffers.push_back(entryBuffer); } else { std::vector entryBuffer; m_TargetExpressionEntryBuffers.push_back(entryBuffer); } m_TargetEntryRowIndexes.push_back(0); } m_BufferSize = 0; m_IsGenotype = IsGenotype; } void CCHPFileBufferWriter::WriteGenotypeEntry(int target, affxchp::CGenotypeProbeSetResults &entry) { GenotypeBufferEntry bufferEntry; bufferEntry.call = entry.AlleleCall; bufferEntry.confidence = entry.Confidence; bufferEntry.RAS1 = entry.RAS1; bufferEntry.RAS2 = entry.RAS2; bufferEntry.aaCall = entry.pvalue_AA; bufferEntry.abCall = entry.pvalue_AB; bufferEntry.bbCall = entry.pvalue_BB; bufferEntry.noCall = entry.pvalue_NoCall; m_TargetGenotypeEntryBuffers[target].push_back(bufferEntry); m_BufferSize += sizeof(GenotypeBufferEntry); if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CCHPFileBufferWriter::WriteExpressionEntry(int target, affxchp::CExpressionProbeSetResults &entry) { ExpressionBufferEntry bufferEntry; bufferEntry.Detection = entry.Detection; bufferEntry.DetectionPValue = entry.DetectionPValue; bufferEntry.Signal = entry.Signal; bufferEntry.NumPairs = entry.NumPairs; bufferEntry.NumUsedPairs = entry.NumUsedPairs; m_TargetExpressionEntryBuffers[target].push_back(bufferEntry); m_BufferSize += sizeof(ExpressionBufferEntry); if (m_BufferSize > m_MaxBufferSize) { FlushBuffer(); } } void CCHPFileBufferWriter::FlushBuffer() { if(m_BufferSize > 0) { if (m_IsGenotype) { for (int target=0; targetsize(); target++) { CCHPFileUpdater updater; updater.OpenCHPFile((*m_CHPFileNames)[target].c_str()); for (int i=0; isize(); target++) { CCHPFileUpdater updater; updater.OpenCHPFile((*m_CHPFileNames)[target].c_str()); for (int i=0; i #include #include // #define MAX_BUFFER_SIZE 5242880 // 5 MB namespace affxchpwriter { class CCHPFileBufferWriter { class GenotypeBufferEntry { public: uint8_t call; float confidence; float RAS1; float RAS2; float aaCall; float abCall; float bbCall; float noCall; }; class ExpressionBufferEntry { public: unsigned char Detection; float DetectionPValue; float Signal; unsigned short NumPairs; unsigned short NumUsedPairs; }; public: /*! Constructor */ CCHPFileBufferWriter(); /*! Destructor */ ~CCHPFileBufferWriter(); /*! Set maximum buffer size */ void SetMaxBufferSize(int MaxBufferSize) { m_MaxBufferSize = MaxBufferSize; } /*! Cleans up memory */ void Cleanup(); /*! Initialize entry buffer writer * @param CHPFileNames Reference to a list of CHP file names. * @param IsGenotype Genotype or Expression CHP file. */ void Initialize(std::vector *CHPFileNames, bool IsGenotype); /*! Write an entry to buffer. If the buffer is full, flush it. * @param target Target for the Signal entry. * @param entry CHP genotype entry. */ void WriteGenotypeEntry(int target, affxchp::CGenotypeProbeSetResults &entry); /*! Write an entry to buffer. If the buffer is full, flush it. * @param target Target for the Signal entry. * @param entry CHP expression entry. */ void WriteExpressionEntry(int target, affxchp::CExpressionProbeSetResults &entry); /*! Write the content of the buffer to CHP files. */ void FlushBuffer(); private: // Pointer to list of CHP file names. std::vector *m_CHPFileNames; // List of targets used for storing genotype entries. std::vector< std::vector > m_TargetGenotypeEntryBuffers; // List of targets used for storing expression entries. std::vector< std::vector > m_TargetExpressionEntryBuffers; // Buffer for storing genotype entry row indexes. std::vector m_TargetEntryRowIndexes; // Size of the current buffer in bytes. int m_BufferSize; // Maximum size of the buffer before it gets flushed int m_MaxBufferSize; // Genotype or Expression bool m_IsGenotype; }; } #endif // _CHPFILEBUFFERWRITER_HEADER_ affxparser/src/fusion/file/CHPFileData.cpp0000644000175200017520000010250014516003651021455 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/CHPFileData.h" // #include "file/FileIO.h" // #include "util/Fs.h" // #include #include #include #include #include #include #include // #ifdef _INCLUDE_UNISTD_HEADER_ #include #endif using namespace affxchp; ////////////////////////////////////////////////////////////////////// /*! The CHP file magic number */ #define CHP_FILE_MAGIC_NUMBER 65 /*! The max CHP file version the parser can read */ #define CHP_FILE_VERSION_NUMBER 2 /*! Identifier to indicate absolute expression analysis results stored. */ #define EXPRESSION_ABSOLUTE_STAT_ANALYSIS 2 /*! Identifier to indicate comparison expression analysis results stored. */ #define EXPRESSION_COMPARISON_STAT_ANALYSIS 3 /*! Used to convert floating point values stored as ints in older CHP files. */ #define ROUNDFLOAT 1000 ////////////////////////////////////////////////////////////////////// void CCHPFileHeader::ParseString ( TagValuePairTypeList &tagList, std::string sSource, std::string sDelimiter1, std::string sDelimiter2 ) { std::string buf; TagValuePairType param; std::list delList; std::list delList2; // Skip delimiters at beginning and find first "non-delimiter" (1st delimiter) std::string::size_type lastPos1 = sSource.find_first_not_of(sDelimiter1, 0); std::string::size_type pos1 = sSource.find_first_of(sDelimiter1, lastPos1); // Loop to get tokens while (std::string::npos != pos1 || std::string::npos != lastPos1) { // Get and store all parsed tokens buf = sSource.substr(lastPos1, pos1 - lastPos1); delList.push_back(buf); // Skip delimiters and find next "non-delimiter" (1st delimiter) lastPos1 = sSource.find_first_not_of(sDelimiter1, pos1); pos1 = sSource.find_first_of(sDelimiter1, lastPos1); } // Reverse through stored tokens std::string sTemp; std::list::reverse_iterator rev(delList.end()); std::list::reverse_iterator rev_end(delList.begin()); for (; rev != rev_end; ++rev) { buf = (*rev); // Make sure "buf" has 2nd delimiter, otherwise add to previous "buf" std::string::size_type posTemp = buf.find_first_of(sDelimiter2, 0); if (std::string::npos == posTemp) { // Need to add the 1st delimiter back in the front of the "buf" // Don't know how many delimiters to add, so let's just add one buf = sDelimiter1 + buf; sTemp = buf + sTemp; } else { // Finally store the valid "name=value" token buf += sTemp; delList2.push_back(buf); sTemp = ""; } } // Now lets finally iterate over tokens to parse the "name" and "value" std::list::iterator iter; for (iter=delList2.begin(); iter!=delList2.end(); ++iter) { buf = (*iter); // Skip delimiters at beginning and find first "non-delimiter" (2nd delimiter) std::string::size_type lastPos2 = buf.find_first_not_of(sDelimiter2, 0); std::string::size_type pos2 = buf.find_first_of(sDelimiter2, lastPos2); // Validate before assigning "Tag" if (std::string::npos != pos2 || std::string::npos != lastPos2) { param.Tag = buf.substr(lastPos2, pos2 - lastPos2); param.Value = ""; // Skip delimiters and find next "non-delimiter" (2nd delimiter) lastPos2 = buf.find_first_not_of(sDelimiter2, pos2); pos2 = buf.find_first_of(sDelimiter2, lastPos2); // Validate before assigning "Value" if (std::string::npos != pos2 || std::string::npos != lastPos2) param.Value = buf.substr(lastPos2, pos2 - lastPos2); // Add to tag list tagList.push_back(param); } } } ////////////////////////////////////////////////////////////////////// std::string CCHPFileHeader::GetAlgorithmParameter(const char *tag) { std::string value; TagValuePairTypeList::iterator iter; for (iter=m_AlgorithmParameters.begin(); iter!=m_AlgorithmParameters.end(); ++iter) { if (*iter == tag) { value = iter->Value; break; } } return value; } ////////////////////////////////////////////////////////////////////// std::string CCHPFileHeader::GetSummaryParameter(const char *tag) { std::string value; TagValuePairTypeList::iterator iter; for (iter=m_SummaryParameters.begin(); iter!=m_SummaryParameters.end(); ++iter) { if (*iter == tag) { value = iter->Value; break; } } return value; } ////////////////////////////////////////////////////////////////////// BackgroundZoneType CCHPFileHeader::GetBackgroundZone(int x, int y) { BackgroundZoneType zone = {0.0, 0.0, 0.0}; BackgroundZoneTypeList::iterator iter; for (iter=m_BackgroundZoneInfo.zones.begin(); iter!=m_BackgroundZoneInfo.zones.end(); ++iter) { if (iter->centerx == x && iter->centery == y) { zone = (*iter); break; } } return zone; } ////////////////////////////////////////////////////////////////////// CCHPFileHeader::CCHPFileHeader() { Clear(); } ////////////////////////////////////////////////////////////////////// void CCHPFileHeader::Clear() { m_Magic=0; m_Version=0; m_Cols=0; m_Rows=0; m_NumProbeSets=0; m_AssayType=Unknown; m_ChipType = ""; m_AlgorithmName = ""; m_AlgorithmVersion = ""; m_ParentCellFile = ""; m_ProgID = ""; m_AlgorithmParameters.erase(m_AlgorithmParameters.begin(), m_AlgorithmParameters.end()); m_SummaryParameters.erase(m_SummaryParameters.begin(), m_SummaryParameters.end()); m_BackgroundZoneInfo.number_zones = 0; m_BackgroundZoneInfo.smooth_factor = 0.0f; m_BackgroundZoneInfo.zones.erase(m_BackgroundZoneInfo.zones.begin(), m_BackgroundZoneInfo.zones.end()); } ////////////////////////////////////////////////////////////////////// CCHPFileHeader::~CCHPFileHeader() { Clear(); } ////////////////////////////////////////////////////////////////////// CCHPFileData::CCHPFileData() { Clear(); } ////////////////////////////////////////////////////////////////////// CCHPFileData::~CCHPFileData() { Clear(); } ////////////////////////////////////////////////////////////////////// void CCHPFileData::Clear() { m_Header.Clear(); m_FileName = ""; m_strError = ""; int n = (int) m_ProbeSetResults.size(); for (int i=0; i CHP_FILE_VERSION_NUMBER) { m_strError = "Unable to read this version of the CHP file."; return false; } // Get the dimensions of the array ReadUInt16_I(instr, usval); m_Header.m_Cols=usval; ReadUInt16_I(instr, usval); m_Header.m_Rows=usval; // Number of probe sets. ReadInt32_I(instr, ival); m_Header.m_NumProbeSets = ival; ReadInt32_I(instr, ival); // no qc data extracted. // Assay type ReadInt32_I(instr, ival); m_Header.m_AssayType = (CCHPFileHeader::GeneChipAssayType)ival; // Prog ID. ReadCString_I(instr, sval); m_Header.m_ProgID = sval; delete[] sval; // Parent cell file. ReadCString_I(instr, sval); m_Header.m_ParentCellFile = sval; delete[] sval; // Chip type ReadCString_I(instr, sval); m_Header.m_ChipType = sval; delete[] sval; // Algorithm ReadCString_I(instr, sval); m_Header.m_AlgorithmName = sval; delete[] sval; // Algorithm version ReadCString_I(instr, sval); m_Header.m_AlgorithmVersion = sval; delete[] sval; // Algorithm parameters. int32_t nParams=0; int32_t iParam; ReadInt32_I(instr, nParams); TagValuePairType param; for (iParam=0; iParamDetection = ucval; ReadFloat_I(instr, fval); pResults->DetectionPValue = fval; ReadFloat_I(instr, fval); pResults->Signal = fval; ReadUInt16_I(instr, usval); pResults->NumPairs = usval; ReadUInt16_I(instr, usval); pResults->NumUsedPairs = usval; pResults->m_HasCompResults = false; // Read the comparison data if (analysisType == EXPRESSION_COMPARISON_STAT_ANALYSIS) { pResults->m_HasCompResults = true; ReadUInt8(instr, ucval); pResults->Change = ucval; ReadFloat_I(instr, fval); pResults->ChangePValue = fval; ReadFloat_I(instr, fval); pResults->SignalLogRatio = fval; ReadFloat_I(instr, fval); pResults->SignalLogRatioLow = fval; ReadFloat_I(instr, fval); pResults->SignalLogRatioHigh = fval; ReadUInt16_I(instr, usval); pResults->NumCommonPairs = usval; } } } else if (m_Header.m_AssayType == CCHPFileHeader::Genotyping) { m_ProbeSetResults.resize(m_Header.m_NumProbeSets); const int DM_ALG_RESULT_SIZE = 21; int32_t dataSize=0; ReadInt32_I(instr, dataSize); for (int iset=0; isetAlleleCall = ucval; ReadFloat_I(instr, fval); pResults->Confidence = fval; ReadFloat_I(instr, fval); pResults->RAS1 = fval; pResults->pvalue_AA = fval; ReadFloat_I(instr, fval); pResults->RAS2 = fval; pResults->pvalue_AB = fval; if (dataSize == DM_ALG_RESULT_SIZE) { ReadFloat_I(instr, fval); pResults->pvalue_BB = fval; ReadFloat_I(instr, fval); pResults->pvalue_NoCall = fval; } } } else if (m_Header.m_AssayType == CCHPFileHeader::Universal) { m_ProbeSetResults.resize(m_Header.m_NumProbeSets); int32_t dataSize=0; float bg; ReadInt32_I(instr, dataSize); for (int iset=0; isetSetBackground(bg); } } else if (m_Header.m_AssayType == CCHPFileHeader::Resequencing) { int32_t dataSize=0; int8_t call; int32_t pos; uint8_t reason; float score; int index; // Read the data size ReadInt32_I(instr, dataSize); // Read the base calls and scores. m_ReseqResults.Clear(); ReadInt32_I(instr, dataSize); m_ReseqResults.ResizeCalledBases(dataSize); m_ReseqResults.ResizeScores(dataSize); for (index=0; index= 2) { // Read the force calls ReadInt32_I(instr, dataSize); m_ReseqResults.ResizeForceCalls(dataSize); ForceCallType forceCall; for (index=0; indexNumPairs = ival; ReadInt32_I(instr, ival); pResults->NumUsedPairs = ival; if (m_Header.m_Version <= 12) // unused ReadInt32_I(instr, ival); ReadInt32_I(instr, ival); // unused if (m_Header.m_Version == 12) { ReadInt32_I(instr, ival); // unused ReadInt32_I(instr, ival); // unused ReadInt32_I(instr, ival); // unused } ReadFloat_I(instr, fval); pResults->DetectionPValue = fval; if (m_Header.m_Version == 12) { ReadFloat_I(instr, fval); // unused } ReadFloat_I(instr, fval); pResults->Signal = fval; ReadInt32_I(instr, ival); pResults->Detection = ival; // unused int ip; for (ip=0; ipNumPairs; ++ip) { ReadFloat_I(instr, fval); ReadInt32_I(instr, ival); if (m_Header.m_Version == 12) { ReadInt32_I(instr, ival); ReadInt32_I(instr, ival); ReadFloat_I(instr, fval); ReadFloat_I(instr, fval); ReadInt32_I(instr, ival); ReadInt8(instr, cval); ReadInt8(instr, cval); } else { ReadUInt16_I(instr, usval); ReadUInt16_I(instr, usval); } if (m_Header.m_Version == 12) { ReadInt32_I(instr, ival); ReadInt32_I(instr, ival); ReadFloat_I(instr, fval); ReadFloat_I(instr, fval); ReadInt32_I(instr, ival); ReadInt8(instr, cval); ReadInt8(instr, cval); } else { ReadUInt16_I(instr, usval); ReadUInt16_I(instr, usval); } } ReadInt32_I(instr, ival); pResults->m_HasCompResults = (ival == 1 ? true : false); if (pResults->m_HasCompResults == true) { ReadInt32_I(instr, ival); pResults->NumCommonPairs = ival; if (m_Header.m_Version == 12) { ReadInt32_I(instr, ival); // unused ReadInt32_I(instr, ival); // unused ReadInt32_I(instr, ival); // unused } ReadInt32_I(instr, ival); pResults->Change = ival; ReadInt8(instr, cval); // unused if (m_Header.m_Version == 12) { ReadInt8(instr, cval); // unused ReadInt32_I(instr, ival); // unused ReadInt32_I(instr, ival); // unused } ReadInt32_I(instr, ival); pResults->SignalLogRatioHigh = (float) ival / ROUNDFLOAT; ReadInt32_I(instr, ival); // unused if (m_Header.m_Version == 12) ReadInt32_I(instr, ival); // unused ReadInt32_I(instr, ival); pResults->SignalLogRatio = (float) ival / ROUNDFLOAT; if (m_Header.m_Version == 12) ReadInt32_I(instr, ival); // unused ReadInt32_I(instr, ival); pResults->SignalLogRatioLow = (float) ival / ROUNDFLOAT; if (m_Header.m_Version == 12) { ReadInt32_I(instr, ival); pResults->ChangePValue = (float) ival / ROUNDFLOAT; } else { ReadFloat_I(instr, fval); pResults->ChangePValue = fval; } } } } else if (m_Header.m_AssayType == CCHPFileHeader::Genotyping) { m_ProbeSetResults.resize(m_Header.m_NumProbeSets); for (int iset=0; isetAlleleCall = ucval; // The confidence if (m_Header.m_Version == 12) { ReadInt32_I(instr, ival); pResults->Confidence = (float) ival / ROUNDFLOAT; } else { ReadFloat_I(instr, fval); pResults->Confidence = fval; } // unused ReadFloat_I(instr, fval); ReadFloat_I(instr, fval); ReadFloat_I(instr, fval); // RAS 1 and 2 ReadFloat_I(instr, fval); pResults->RAS1 = fval; ReadFloat_I(instr, fval); pResults->RAS2 = fval; } else { pResults->Confidence = 0.0f; pResults->RAS1 = 0.0f; pResults->RAS2 = 0.0f; pResults->AlleleCall = ALLELE_NO_CALL; } // 100K results are not stored in this version. pResults->pvalue_AA = 0.0f; pResults->pvalue_AB = 0.0f; pResults->pvalue_BB = 0.0f; pResults->pvalue_NoCall = 0.0f; // unused ReadCString_I(instr, sval); delete[] sval; ReadCString_I(instr, sval); delete[] sval; ReadInt32_I(instr, ival); int np = ival; int ip; for (ip=0; ipSetBackground(bg); } // Ignore the probe level data. for (int icell=0; icell= 8) { ReadInt8(instr, unused_8); // unused mask ReadInt8(instr, unused_8); // unused outlier } } } } } } else if (m_Header.m_AssayType == CCHPFileHeader::Resequencing) { int32_t dataSize=0; int32_t i32; int8_t i8; int16_t i16; float score; int index; std::string str; // Read the base calls. m_ReseqResults.Clear(); ReadInt32_I(instr, dataSize); m_ReseqResults.ResizeCalledBases(dataSize); m_ReseqResults.ResizeScores(dataSize); for (index=0; index0) { ReadFixedString(instr, str, i32); ReadInt32_I(instr, i32); if (i32>0) ReadFixedString(instr, str, i32); } for (index=0; index #include #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affxchp { ////////////////////////////////////////////////////////////////////// /*! This class stores a zone's background value */ typedef struct _BackgroundZoneType { /*! The X coordinate of the center of the zone. */ float centerx; /*! The Y coordinate of the center of the zone. */ float centery; /*! The zone's background value */ float background; /*! Assignment operator * @param zn The zone to copy * @return The new zone object */ _BackgroundZoneType operator=(_BackgroundZoneType zn) { centerx = zn.centerx; centery = zn.centery; background = zn.background; return *this; } } BackgroundZoneType; /*! The size of the zone as stored in a CHP file. */ #define ZONE_INFO_TYPE_SIZE (3*sizeof(float)) /*! An STL list of zones */ typedef std::list BackgroundZoneTypeList; /*! Stores a list of zones for the entire array */ typedef struct _BackgroundZoneInfo { /*! The number of zones used in the array */ int number_zones; /*! The smoothing factor used to calculate the zone backgrounds */ float smooth_factor; /*! The list of zone background values */ BackgroundZoneTypeList zones; } BackgroundZoneInfo; //////////////////////////////////////////////////////////////////// /*! This class provides storage for the CHP file header */ class CCHPFileHeader { public: /*! Constructor */ CCHPFileHeader(); /*! Destructor */ ~CCHPFileHeader(); public: /*! Defines the assay type for the array */ typedef enum {Expression, Genotyping, Resequencing, Universal, Unknown} GeneChipAssayType; protected: /*! The magic number in the file */ int m_Magic; /*! The version number in the file */ int m_Version; /*! The number of feature columns in the array */ unsigned short m_Cols; /*! The number of feature rows in the array */ unsigned short m_Rows; /*! The number of probe set results */ int m_NumProbeSets; /*! The type of results stored in the CHP file */ GeneChipAssayType m_AssayType; /*! The chip type or probe array type of the CHP file */ std::string m_ChipType; /*! The name of the algorithm used to create the CHP file */ std::string m_AlgorithmName; /*! The version number of the algorithm used to create the CHP file */ std::string m_AlgorithmVersion; /*! The name of the CEL file used in the creation of the CHP file */ std::string m_ParentCellFile; /*! The programmatic identifier of the algorithm used to create the CHP file */ std::string m_ProgID; /*! The list of algorithm parameters */ TagValuePairTypeList m_AlgorithmParameters; /*! A list of summary parameters generated by the CHP file generating algorithm */ TagValuePairTypeList m_SummaryParameters; /*! The background's for each of the zones (calculated by the expression algorithm) */ BackgroundZoneInfo m_BackgroundZoneInfo; /*! Parses the parameters string into a list given the delimiters. * @param tagList The resulting parameter name/value list. * @param strSource The parameters in a string representation * @param sDelimiter1 The delimiter between each parameter * @param sDelimiter2 The delimiter between the tag and value */ void ParseString(TagValuePairTypeList &tagList, std::string strSource, std::string sDelimiter1, std::string sDelimiter2); /*! Clears the class members */ void Clear(); /*! Friend to the parent object */ friend class CCHPFileData; public: /*! Gets the number of feature columns * @return The number of feature columns */ int GetCols() const { return m_Cols; } /*! Gets the number of feature rows * @return The number of feature rows */ int GetRows() const { return m_Rows; } /*! Gets the number of probe sets * @return The number of probe sets */ int GetNumProbeSets() const { return m_NumProbeSets; } /*! Gets the assay type * @return The assay type */ GeneChipAssayType GetAssayType() const { return m_AssayType; } /*! Gets the chip type * @return The chip type */ std::string GetChipType() const { return m_ChipType; } /*! Gets the algorithm name * @return The algorithm name */ std::string GetAlgName() const { return m_AlgorithmName; } /*! Gets the algorithm version * @return The algorithm version */ std::string GetAlgVersion() const { return m_AlgorithmVersion; } /*! Gets the algorithm parameters * @return The number of feature columns */ TagValuePairTypeList &AlgorithmParameters() { return m_AlgorithmParameters; } /*! Gets the summary parameters * @return The summary parameters */ TagValuePairTypeList &SummaryParameters() { return m_SummaryParameters; } /*! Gets the parent CEL file * @return The parent CEL file */ std::string GetParentCellFile() const { return m_ParentCellFile; } /*! Gets the prog ID * @return The prog ID */ std::string GetProgID() const { return m_ProgID; } /*! Gets a specific algorithm parameter given a name/tag * @return The specific algorithm parameter given a name/tag */ std::string GetAlgorithmParameter(const char *tag); /*! Gets a specific summary parameter given a name/tag * @return The specific summary parameter given a name/tag */ std::string GetSummaryParameter(const char *tag); /*! Gets the background zone information * @return The background zone information */ BackgroundZoneInfo &GetBackgroundZoneInfo() { return m_BackgroundZoneInfo; } /*! Gets the list of background zone positions and values * @return The list of background zone positions and values */ BackgroundZoneTypeList &GetBackgroundZones() { return m_BackgroundZoneInfo.zones; } /*! Gets the background value for a given center coordinate * @return The background value for a given center coordinate */ BackgroundZoneType GetBackgroundZone(int x, int y); /*! Gets the magic number * @return The magic number */ int GetMagicNumber() const { return m_Magic; } /*! Gets the version number * @return The version number */ int GetVersionNumber() const { return m_Version; } /*! Sets the number of columns * @param n The number of columns. */ void SetCols(int n) { m_Cols=n; } /*! Sets the number of rows * @param n The number of rows. */ void SetRows(int n) { m_Rows=n; } /*! Sets the number of probe sets * @param n The number of probe sets. */ void SetNumProbeSets(int n) { m_NumProbeSets=n; } /*! Sets the assay type * @param t The assay type. */ void SetAssayType(GeneChipAssayType t) { m_AssayType=t; } /*! Sets the chip type or probe array type * @param s The chip type or probe array type. */ void SetChipType(const char *s) { m_ChipType=s; } /*! Sets the algorithm name * @param s The algorithm name. */ void SetAlgName(const char *s) { m_AlgorithmName=s; } /*! Sets the algorithm version * @param s The algorithm version. */ void SetAlgVersion(const char *s) { m_AlgorithmVersion=s; } /*! Sets the parent CEL file * @param s The parent CEL file. */ void SetParentCellFile(const char *s) { m_ParentCellFile=s; } /*! Sets the prog ID * @param s The programatic identifier */ void SetProgID(const char *s) { m_ProgID=s; } }; //////////////////////////////////////////////////////////////////// /*! Provides a base class for probe set results */ class CProbeSetResults { public: /*! Constructor */ CProbeSetResults() {}; /*! Destructor */ virtual ~CProbeSetResults() {}; }; //////////////////////////////////////////////////////////////////// /*! Present call for expression analysis */ #define ABS_PRESENT_CALL 0 /*! Marginal call for expression analysis */ #define ABS_MARGINAL_CALL 1 /*! Absent call for expression analysis */ #define ABS_ABSENT_CALL 2 /*! No call call for expression analysis */ #define ABS_NO_CALL 3 /*! Increase call for expression comparison analysis */ #define COMP_INCREASE_CALL 1 /*! Decrease call for expression comparison analysis */ #define COMP_DECREASE_CALL 2 /*! Moderate increase call for expression comparison analysis */ #define COMP_MOD_INCREASE_CALL 3 /*! Moderate decrease call for expression comparison analysis */ #define COMP_MOD_DECREASE_CALL 4 /*! No change call for expression comparison analysis */ #define COMP_NO_CHANGE_CALL 5 /*! No call call for expression comparison analysis */ #define COMP_NO_CALL 6 /*! Expression analysis probe set results for the MAS5 algorithm */ class CExpressionProbeSetResults : public CProbeSetResults { public: /*! The detection p-value */ float DetectionPValue; /*! The signal value */ float Signal; /*! The number of probe pairs in the set */ unsigned short NumPairs; /*! The number of probe pairs used to calculate the signal value */ unsigned short NumUsedPairs; /*! The detection call */ unsigned char Detection; /*! Flag indicating that comparison results exist */ bool m_HasCompResults; /*! The change p-value */ float ChangePValue; /*! The signal log ratio */ float SignalLogRatio; /*! The signal log ratio low value */ float SignalLogRatioLow; /*! The signal log ratio high value */ float SignalLogRatioHigh; /*! The number of probe pairs in common between the experiment and baseline data */ unsigned short NumCommonPairs; /*! The change call */ unsigned char Change; /*! Returns a string representation of the detection call. * @return The detection call */ std::string GetDetectionString(); /*! Returns a string representation of the change call. * @return The change call */ std::string GetChangeString(); /*! Assignment operator * @param src The object to copy * @return The copied object */ CExpressionProbeSetResults operator=(CExpressionProbeSetResults &src); /*! Constructor */ CExpressionProbeSetResults() { m_HasCompResults = false; } /*! Destructor */ ~CExpressionProbeSetResults() {} }; //////////////////////////////////////////////////////////////////// /*! The AA allele call */ #define ALLELE_A_CALL 6 /*! The BB allele call */ #define ALLELE_B_CALL 7 /*! The AB allele call */ #define ALLELE_AB_CALL 8 /*! The no call allele call */ #define ALLELE_NO_CALL 11 /*! Genotyping analysis probe set results */ class CGenotypeProbeSetResults : public CProbeSetResults { public: /*! The allele call */ unsigned char AlleleCall; /*! The confidence associated with the allele call */ float Confidence; /*! The relative allele strength for the first block */ float RAS1; /*! The relative allele strength for the second block */ float RAS2; /*! The p-value associated with an AA call */ float pvalue_AA; /*! The p-value associated with an AB call */ float pvalue_AB; /*! The p-value associated with an BB call */ float pvalue_BB; /*! The p-value associated with an no call call */ float pvalue_NoCall; /*! Returns a string representation of the allele call. * @return The allele call */ std::string GetAlleleCallString(); /*! Assignment operator * @param src The object to copy * @return The copied object */ CGenotypeProbeSetResults operator=(CGenotypeProbeSetResults &src); /*! Constructor */ CGenotypeProbeSetResults() {Confidence=RAS1=RAS2=pvalue_AA=pvalue_AB=pvalue_BB=pvalue_NoCall=0;} /*! Destructor */ ~CGenotypeProbeSetResults() {} }; //////////////////////////////////////////////////////////////////// /*! Universal (tag array) analysis probe set results. */ class CUniversalProbeSetResults : public CProbeSetResults { protected: /*! The background value.*/ float background; public: /*! Gets the background value. * @return The background value. */ float GetBackground() const { return background; } /*! Sets the background value. * @param bg The background value. */ void SetBackground(float bg) { background = bg; } /*! Assignment operator * @param src The object to copy * @return The copied object */ CUniversalProbeSetResults operator=(CUniversalProbeSetResults &src); /*! Constructor */ CUniversalProbeSetResults() { background=0; } /*! Destructor */ ~CUniversalProbeSetResults() {} }; //////////////////////////////////////////////////////////////////// /*! A structure to hold a force call, its position and reason. * * A force call is the call the algorithm would have made if the thresholds * were not applied. */ typedef struct _ForceCallType { /*! The position (index) of the call. */ int position; /*! The force call. */ char call; /*! The reason for the call. */ unsigned char reason; } ForceCallType; /*! The force call was made due to no signal threshold. */ #define NO_SIGNAL_THR_FORCE_CALL 'N' /*! The force call was made due to weak signal threshold. */ #define WEAK_SIGNAL_THR_FORCE_CALL 'W' /*! The force call was made due to saturation level. */ #define SATURATION_LEVEL_FORCE_CALL 'S' /*! The force call was made due to quality score threshold. */ #define QUALITY_SCORE_THR_FORCE_CALL 'Q' /*! The force call was made due to failed both trace and sequence profiles. */ #define TRACE_AND_SEQUENCE_PROFILES_FORCE_CALL 'F' /*! The force call was made due to base reliability threshold. */ #define RELIABILITY_THR_FORCE_CALL 'B' /*! A structure to hold a base call at a given position (index). */ typedef struct _BaseCallType { /*! The position (index) of the call. */ int position; /*! The call. */ char call; } BaseCallType; /*! Resequencing results. */ class CResequencingResults { protected: /*! The called bases. */ std::vector calledBases; /*! Base call scores. */ std::vector scores; /*! An array of force calls - base calls the algorithm would have made if the thresholds were removed. */ std::vector forceCalls; /*! An array of original calls. The calledBases contained the results of the algorithm and user edits. * If a user edits a base the original algorithm called base is stored in this vector. */ std::vector origCalls; public: /*! Constructor */ CResequencingResults() {} /*! Destructor */ ~CResequencingResults() { Clear(); } /*! Clears the members. */ void Clear() { calledBases.clear(); scores.clear(); forceCalls.clear(); origCalls.clear(); } /*! Gets the called bases. * @return The array of called bases. */ const std::vector &GetCalledBases() { return calledBases; } /*! Gets the called base at the given index. * @param index The index to the called bases array. * @return The called base. */ char GetCalledBase(int index) { return calledBases[index]; } /*! Gets the size of the called bases array. * @return The size of the called bases array. */ int GetCalledBasesSize() const { return (int) calledBases.size(); } /*! Resizes the called bases array. * @param size The size of the array. */ void ResizeCalledBases(int size) { calledBases.resize(size); } /*! Sets the called base. * @param index The index to the array. * @param call The call. */ void SetCalledBase(int index, char call) { calledBases[index] = call; } /*! Gets the scores. * @return The array of scores. */ const std::vector &GetScores() { return scores; } /*! Gets the score at the given index. * @param index The index to the scores array. * @return The score. */ float GetScore(int index) { return scores[index]; } /*! Gets the size of the scores array. * @return The size of the scores array. */ int GetScoresSize() const { return (int) scores.size(); } /*! Resizes the scores array. * @param size The size of the array. */ void ResizeScores(int size) { scores.resize(size); } /*! Sets the score. * @param index The index to the array. * @param score The score. */ void SetScore(int index, float score) { scores[index] = score; } /*! Gets the force calls. * @return The array of force calls. */ const std::vector &GetForceCalls() { return forceCalls; } /*! Gets the force call at the given index. * @param index The index to the force calls array. * @return The force call. */ ForceCallType GetForceCall(int index) { return forceCalls[index]; } /*! Gets the size of the force calls array. * @return The size of the force calls array. */ int GetForceCallsSize() const { return (int) forceCalls.size(); } /*! Resizes the force calls array. * @param size The size of the array. */ void ResizeForceCalls(int size) { forceCalls.resize(size); } /*! Sets the force call. * @param index The index to the array. * @param call The force call. */ void SetForceCall(int index, ForceCallType call) { forceCalls[index] = call; } /*! Gets the original called bases. * @return The array of original calls. */ const std::vector &GetOrigCalls() { return origCalls; } /*! Gets the original called base at the given index. * @param index The index to the original calls array. * @return The original call. */ BaseCallType GetOrigCall(int index) { return origCalls[index]; } /*! Gets the size of the original calls array. * @return The size of the original calls array. */ int GetOrigCallsSize() const { return (int) origCalls.size(); } /*! Resizes the original calls array. * @param size The size of the array. */ void ResizeOrigCalls(int size) { origCalls.resize(size); } /*! Sets the original call. * @param index The index to the array. * @param call The original call. */ void SetOrigCall(int index, BaseCallType call) { origCalls[index] = call; } }; //////////////////////////////////////////////////////////////////// /*! This class provides storage and reading capabilities for CHP files */ class CCHPFileData { public: /*! Constructor */ CCHPFileData(); /*! Destructor */ ~CCHPFileData(); protected: /*! The file header object */ CCHPFileHeader m_Header; /*! The full path of the CHP file */ std::string m_FileName; /*! A string to hold an error message associated with a read operation */ std::string m_strError; /*! The vector of probe set results */ std::vector m_ProbeSetResults; /*! The resequencing results. */ CResequencingResults m_ReseqResults; /*! Opens the file for reading. * @param bReadHeaderOnly Flag to indicate if the header is to be read only. * @return True if successful. */ bool Open(bool bReadHeaderOnly=false); public: /*! Accessors to header. * @return The header data object */ CCHPFileHeader &GetHeader() { return m_Header; } /*! Returns the expression probe set result * @param index The index to the result object of interest. * @return The expression result. */ CExpressionProbeSetResults *GetExpressionResults(int index); /*! Returns the genotyping probe set result * @param index The index to the result object of interest. * @return The genotyping result. */ CGenotypeProbeSetResults *GetGenotypingResults(int index); /*! Returns the universal (tag array) probe set result * @param index The index to the result object of interest. * @return The universal result. */ CUniversalProbeSetResults *GetUniversalResults(int index); /*! Returns the resequencing results. * @return The resequencing results. */ CResequencingResults *GetResequencingResults(); /*! Error string when the read functions fail. * @return A string message describing a read error */ std::string GetError() const { return m_strError; } // Functions to read file. bool Read(); /*! Reads the header of the CHP file * @return True if successful */ bool ReadHeader(); /*! Determines if the file specified by the FileName property exists. * @return True if the file exists. */ bool Exists(); /*! Determines if the CHP file specified by the FileName property is an XDA format file * @return True if the file is an XDA file */ bool IsXDACompatibleFile(); bool IsMas5File(); /*! Sets the file name. * @param name The full path to the CHP file */ void SetFileName(const char *name) { m_FileName = name; } /*! Gets the file name. * @return The full path to the CHP file. */ std::string GetFileName() const { return m_FileName; } /*! Deallocates any memory used by the class object */ void Clear(); }; //////////////////////////////////////////////////////////////////// } // namespace //////////////////////////////////////////////////////////////////// #endif // !defined(_CHPFileData_HEADER_) affxparser/src/fusion/file/CHPFileUpdater.cpp0000644000175200017520000000636514516003651022224 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/CHPFileUpdater.h" // #include "file/FileWriter.h" // #include "util/Err.h" #include "util/Util.h" // using namespace std; using namespace affxchp; using namespace affxchpwriter; ////////////////////////////////////////////////////////////////////// #define EXPRESSION_ABSOLUTE_STAT_ANALYSIS 2 ////////////////////////////////////////////////////////////////////// /* * Initialize any needed members. */ CCHPFileUpdater::CCHPFileUpdater() : CCHPFileWriter() { m_CHPFile = NULL; } /* * Clean up. */ CCHPFileUpdater::~CCHPFileUpdater() { CloseCHPFile(); } void CCHPFileUpdater::OpenCHPFile(const char *fileName) { CloseCHPFile(); m_CHPFile = new ofstream(fileName, ios::binary | ios::app); if (!m_CHPFile) { Err::errAbort("CCHPFileUpdater::OpenCHPFile() - Unable to open CHP file for updating: " + ToStr(fileName)); } } void CCHPFileUpdater::UpdateExpressionEntry(int row, unsigned char Detection, float DetectionPValue, float Signal, unsigned short NumPairs, unsigned short NumUsedPairs) { // Save the data size for the first probe set. if (row == 0) { unsigned char analysisType = EXPRESSION_ABSOLUTE_STAT_ANALYSIS; int resultsSize = UCHAR_SIZE + FLOAT_SIZE + FLOAT_SIZE + USHORT_SIZE + USHORT_SIZE; WriteUInt8(*m_CHPFile, analysisType); WriteInt32_I(*m_CHPFile, resultsSize); } // Write the absolute data. WriteUInt8(*m_CHPFile, Detection); WriteFloat_I(*m_CHPFile, DetectionPValue); WriteFloat_I(*m_CHPFile, Signal); WriteUInt16_I(*m_CHPFile, NumPairs); WriteUInt16_I(*m_CHPFile, NumUsedPairs); } void CCHPFileUpdater::UpdateGenotypeEntry(int row, uint8_t call, float confidence, float RAS1, float RAS2, float aaCall, float abCall, float bbCall, float noCall) { // Save the data size for the first probe set. if (row == 0) { int resultsSize = UCHAR_SIZE + FLOAT_SIZE + FLOAT_SIZE + FLOAT_SIZE + FLOAT_SIZE + FLOAT_SIZE; WriteInt32_I(*m_CHPFile, resultsSize); } // Write probe set result. WriteUInt8(*m_CHPFile, call); WriteFloat_I(*m_CHPFile, confidence); if (aaCall == 0) { WriteFloat_I(*m_CHPFile, RAS1); } else { WriteFloat_I(*m_CHPFile, aaCall); } if (abCall == 0) { WriteFloat_I(*m_CHPFile, RAS2); } else { WriteFloat_I(*m_CHPFile, abCall); } WriteFloat_I(*m_CHPFile, bbCall); WriteFloat_I(*m_CHPFile, noCall); } void CCHPFileUpdater::CloseCHPFile() { if (m_CHPFile != NULL) { if (m_CHPFile->is_open() == true) { m_CHPFile->close(); } delete m_CHPFile; m_CHPFile = NULL; } } affxparser/src/fusion/file/CHPFileUpdater.h0000644000175200017520000000547114516003651021666 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! \file CHPFileUpdater.h Provides interfaces to update data in a "Calvin" binary "signal" data file. */ #ifndef _CHPFileUpdater_H_ #define _CHPFileUpdater_H_ // #include "file/CHPFileWriter.h" // #include "portability/affy-base-types.h" // #include // namespace affxchpwriter { /*! Provides interfaces to update data in a "Calvin" binary "signal" data file. * This class assumes that the file has been completely written and that * only existing data groups/sets/rows/cols are being modified. */ class CCHPFileUpdater : public CCHPFileWriter { public: /*! Constructor */ CCHPFileUpdater(); /*! Destructor */ ~CCHPFileUpdater(); /*! Open CHP signal file and initialize it. * @param fileName The name of the CHP signal file. */ void OpenCHPFile(const char *fileName); /*! Seek to appropriate file position and update expression signal * @param row The row index. * @param Detection The detection call. * @param DetectionPValue The detection p-value. * @param Signal The new signal value. * @param NumPairs The number of probe pairs in the set. * @param NumUsedPairs The number of probe pairs used to calculate the signal value. */ void UpdateExpressionEntry(int row, unsigned char Detection, float DetectionPValue, float Signal, unsigned short NumPairs, unsigned short NumUsedPairs); /*! Seek to appropriate file position and update genotype entry * @param row The row index. * @param call CHP call representation. * @param confidence CHP confidence value. * @param RAS1 CHP RAS1 value. * @param RAS2 CHP RAS2 value. * @param aaCall CHP aaCall value. * @param abCall CHP abCall value. * @param bbCall CHP bbCall value. * @param noCall CHP noCall value. */ void UpdateGenotypeEntry(int row, uint8_t call, float confidence, float RAS1, float RAS2, float aaCall, float abCall, float bbCall, float noCall); /*! Close CHP signal file. */ void CloseCHPFile(); private: // CHP signal file std::ofstream *m_CHPFile; }; } #endif // _CHPFileUpdater_H_ affxparser/src/fusion/file/CHPFileWriter.cpp0000644000175200017520000003560014516003651022066 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/CHPFileWriter.h" // #include "file/FileWriter.h" // #include #include #include #include // #ifdef _INCLUDE_UNISTD_HEADER_ #include #endif using namespace affxchp; using namespace affxchpwriter; ////////////////////////////////////////////////////////////////////// #define DELIMCHAR 0x14 #define MIN_CELLSTR 4 #define CHP_FILE_MAGIC_NUMBER 65 #define NON_RESEQ_CHP_FILE_VERSION_NUMBER 1 #define RESEQ_CHP_FILE_VERSION_NUMBER 2 #define EXPRESSION_ABSOLUTE_STAT_ANALYSIS 2 #define EXPRESSION_COMPARISON_STAT_ANALYSIS 3 ////////////////////////////////////////////////////////////////////// CCHPFileWriter::CCHPFileWriter() : CCHPFileData() { m_ProbeSetIndex = 0; } ////////////////////////////////////////////////////////////////////// CCHPFileWriter::~CCHPFileWriter() { Clear(); } ////////////////////////////////////////////////////////////////////// bool CCHPFileWriter::CreateNewFile() { // Open the file. m_strError = ""; m_NewChpFile.open(m_FileName.c_str(), std::ios::out | std::ios::binary); if (!m_NewChpFile) { m_strError = "Unable to open the file."; return false; } return true; } ////////////////////////////////////////////////////////////////////// void CCHPFileWriter::InitializeForWriting(affxcdf::CCDFFileData &cdf, bool allocateMemory) { InitializeForWriting( cdf.GetHeader().GetRows(), cdf.GetHeader().GetCols(), cdf.GetHeader().GetNumProbeSets(), cdf.GetChipType().c_str(), cdf.GetProbeSetType(0), allocateMemory); } ////////////////////////////////////////////////////////////////////// void CCHPFileWriter::InitializeForWriting(int numRows, int numCols, int numProbeSets, const char *chipType, affxcdf::GeneChipProbeSetType probeSetType, bool allocateMemory) { m_ProbeSetIndex = 0; // Set the header values. m_Header.SetCols(numCols); m_Header.SetRows(numRows); m_Header.SetNumProbeSets(numProbeSets); m_Header.SetChipType(chipType); switch (probeSetType) { case affxcdf::UnknownProbeSetType: m_Header.SetAssayType(CCHPFileHeader::Unknown); break; case affxcdf::ExpressionProbeSetType: m_Header.SetAssayType(CCHPFileHeader::Expression); break; case affxcdf::GenotypingProbeSetType: m_Header.SetAssayType(CCHPFileHeader::Genotyping); break; case affxcdf::ResequencingProbeSetType: m_Header.SetAssayType(CCHPFileHeader::Resequencing); break; case affxcdf::TagProbeSetType: m_Header.SetAssayType(CCHPFileHeader::Universal); break; default: m_Header.SetAssayType(CCHPFileHeader::Unknown); break; } // Don't allocate for resequencing. if (probeSetType == affxcdf::ResequencingProbeSetType) return; // Allocate memory for probe set results m_ProbeSetResults.clear(); if (allocateMemory == true) { m_ProbeSetResults.resize(numProbeSets); CProbeSetResults *pResults; for (int iset=0; isetTag); WriteCString(m_NewChpFile, iter->Value); } WriteInt32_I(m_NewChpFile, (int) m_Header.SummaryParameters().size()); for (iter=m_Header.SummaryParameters().begin(); iter!=m_Header.SummaryParameters().end(); ++iter) { WriteCString(m_NewChpFile, iter->Tag); WriteCString(m_NewChpFile, iter->Value); } // Write the zone info. WriteInt32_I(m_NewChpFile, m_Header.GetBackgroundZoneInfo().number_zones); WriteFloat_I(m_NewChpFile, m_Header.GetBackgroundZoneInfo().smooth_factor); BackgroundZoneTypeList::iterator start(m_Header.GetBackgroundZoneInfo().zones.begin()); BackgroundZoneTypeList::iterator end(m_Header.GetBackgroundZoneInfo().zones.end()); BackgroundZoneType zone; for (; start != end; ++start) { zone = (*start); WriteFloat_I(m_NewChpFile, start->centerx); WriteFloat_I(m_NewChpFile, start->centery); WriteFloat_I(m_NewChpFile, start->background); } return !m_NewChpFile.fail(); } ////////////////////////////////////////////////////////////////////// bool CCHPFileWriter::SaveExpressionEntry(CExpressionProbeSetResults *pEntry) { // Save the data size for the first probe set. if (m_ProbeSetIndex == 0) { int resultsSize = UCHAR_SIZE + FLOAT_SIZE + FLOAT_SIZE + USHORT_SIZE + USHORT_SIZE; unsigned char analysisType = EXPRESSION_ABSOLUTE_STAT_ANALYSIS; if (pEntry->m_HasCompResults) { analysisType = EXPRESSION_COMPARISON_STAT_ANALYSIS; resultsSize += UCHAR_SIZE + FLOAT_SIZE + FLOAT_SIZE + FLOAT_SIZE + FLOAT_SIZE + USHORT_SIZE; } WriteUInt8(m_NewChpFile, analysisType); WriteInt32_I(m_NewChpFile, resultsSize); } // Write the absolute data. WriteUInt8(m_NewChpFile, pEntry->Detection); WriteFloat_I(m_NewChpFile, pEntry->DetectionPValue); WriteFloat_I(m_NewChpFile, pEntry->Signal); WriteUInt16_I(m_NewChpFile, pEntry->NumPairs); WriteUInt16_I(m_NewChpFile, pEntry->NumUsedPairs); // Write the comparison data if (pEntry->m_HasCompResults == true) { WriteUInt8(m_NewChpFile, pEntry->Change); WriteFloat_I(m_NewChpFile, pEntry->ChangePValue); WriteFloat_I(m_NewChpFile, pEntry->SignalLogRatio); WriteFloat_I(m_NewChpFile, pEntry->SignalLogRatioLow); WriteFloat_I(m_NewChpFile, pEntry->SignalLogRatioHigh); WriteUInt16_I(m_NewChpFile, pEntry->NumCommonPairs); } ++m_ProbeSetIndex; return !m_NewChpFile.fail(); } ////////////////////////////////////////////////////////////////////// bool CCHPFileWriter::SaveMappingEntry(affxchp::CGenotypeProbeSetResults *pEntry) { // Save the data size for the first probe set. float fval; if (m_ProbeSetIndex == 0) { int resultsSize = UCHAR_SIZE + FLOAT_SIZE + FLOAT_SIZE + FLOAT_SIZE + FLOAT_SIZE + FLOAT_SIZE; WriteInt32_I(m_NewChpFile, resultsSize); } // Write probe set result. WriteUInt8(m_NewChpFile, pEntry->AlleleCall); WriteFloat_I(m_NewChpFile, pEntry->Confidence); fval = pEntry->pvalue_AA; if (fval == 0) fval = pEntry->RAS1; WriteFloat_I(m_NewChpFile, fval); fval = pEntry->pvalue_AB; if (fval == 0) fval = pEntry->RAS2; WriteFloat_I(m_NewChpFile, fval); WriteFloat_I(m_NewChpFile, pEntry->pvalue_BB); WriteFloat_I(m_NewChpFile, pEntry->pvalue_NoCall); ++m_ProbeSetIndex; return !m_NewChpFile.fail(); } ////////////////////////////////////////////////////////////////////// bool CCHPFileWriter::SaveUniversalEntry(affxchp::CUniversalProbeSetResults *pEntry) { // Save the data size for the first probe set. if (m_ProbeSetIndex == 0) { int resultsSize = FLOAT_SIZE; WriteInt32_I(m_NewChpFile, resultsSize); } WriteFloat_I(m_NewChpFile, pEntry->GetBackground()); ++m_ProbeSetIndex; return !m_NewChpFile.fail(); } ////////////////////////////////////////////////////////////////////// bool CCHPFileWriter::Save() { if (SaveHeader() == false) return false; // Write the probe set data if (m_Header.GetAssayType() == CCHPFileHeader::Expression) { // Set the type of analysis for (int iset=0; iset #include #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affxchpwriter { ////////////////////////////////////////////////////////////////////// /*! This class is used for writing CHP files. */ class CCHPFileWriter : public affxchp::CCHPFileData { public: /*! Constructor */ CCHPFileWriter(); /*! Destructor */ ~CCHPFileWriter(); protected: /*! The output file stream object */ std::ofstream m_NewChpFile; /*! A flag to indicate the first entry is being saved. */ int m_ProbeSetIndex; public: /*! Creates the new file (empty) given the file name. * @return True if successful */ bool CreateNewFile(); /*! Allocates memory for the results objects and prepares the object for writing. * @param cdf The associated CDF file. * @param allocateMemory Allocates memory for the results. */ void InitializeForWriting(affxcdf::CCDFFileData &cdf, bool allocateMemory=true); /*! Allocates memory for the results objects and prepares the object for writing. * @param numRows The number of rows of features on the array. * @param numCols The number of columns of features on the array. * @param numProbeSets The number of probe sets. * @param chipType The chip type or probe array type. * @param probeSetType The type of probe sets in the file. * @param allocateMemory Allocates memory for the results. */ void InitializeForWriting(int numRows, int numCols, int numProbeSets, const char *chipType, affxcdf::GeneChipProbeSetType probeSetType, bool allocateMemory=true); /*! Sets the parent CEL file name. * @param str The parent CEL file name (full path). */ void SetParentCelFileName(const char *str); /*! Sets the prog ID of the algorithm used to create the CHP file data. * @param str The prog ID. */ void SetProgID(const char *str); /*! Sets the algorithm name used to create the CHP file data. * @param str The name of the algorithm. */ void SetAlgorithmName(const char *str); /*! Sets the algorithm version used to create the CHP file data. * @param str The algorithm version. */ void SetAlgorithmVersion(const char *str); /*! Adds a parameter name/value pair to the list of algorithm parameters * @param tag The name of the parameter * @param value The value of the parameter */ void AddAlgorithmParameter(const char *tag, const char *value); /*! Adds a parameter name/value pair to the list of summary parameters * @param tag The name of the parameter * @param value The value of the parameter */ void AddChipSummaryParameter(const char *tag, const char *value); /*! Sets the expression probe set result * @param index The index to the probe set results * @param pEntry A pointer to the results */ void SetExpressionEntry(int index, affxchp::CExpressionProbeSetResults *pEntry); /*! Saves the results to the file. * @param pEntry A pointer to the results. */ bool SaveExpressionEntry(affxchp::CExpressionProbeSetResults *pEntry); /*! Sets the genotyping probe set result * @param index The index to the probe set results * @param pEntry A pointer to the results */ void SetMappingEntry(int index, affxchp::CGenotypeProbeSetResults *pEntry); /*! Saves the results to the file. * @param pEntry A pointer to the results. */ bool SaveMappingEntry(affxchp::CGenotypeProbeSetResults *pEntry); /*! Sets the universal probe set result * @param index The index to the probe set results * @param pEntry A pointer to the results */ void SetUniversalEntry(int index, affxchp::CUniversalProbeSetResults *pEntry); /*! Saves the results to the file. * @param pEntry A pointer to the results. */ bool SaveUniversalEntry(affxchp::CUniversalProbeSetResults *pEntry); /*! Sets the resequencing results * @param pResults A pointer to the results */ void SetResequencingResults(affxchp::CResequencingResults *pResults); /*! Saves the results to the file. * @return True if successful */ bool Save(); /*! Saves the results to the file. * @return True if successful */ bool SaveHeader(); /*! Closes the file. * @return True if successful */ bool Close(); /*! Initializes the object which stores the background zone information * @param nZones The number of zones * @param smoothFactor The smooth factor used by the background algorithm. */ void AddBackgroundInfo(int nZones, float smoothFactor); /*! Adds a background zone to the list * @param x The central x coordinate of the background zone. * @param y The central y coordinate of the background zone. * @param value The background value for the zone. */ void AddBackgroundZone(int x, int y, float value); }; //////////////////////////////////////////////////////////////////// } // namespace //////////////////////////////////////////////////////////////////// #endif // !defined(AFX_CHPFILEWRITER_H__A16FCA8B_7B8D_4071_81CD_F6B2BB61A169__INCLUDED_) affxparser/src/fusion/file/CMSFileData.cpp0000644000175200017520000001305714516003651021475 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/CMSFileData.h" // #include "file/IniFile.h" // #include #include #include #include #include #include #include #include // using namespace std; using namespace affxcms; ////////////////////////////////////////////////////////////////////// CCMSFileHeader::CCMSFileHeader() : m_nVersion(CMS_FILE_VERSION) { } ///////////////////////////////////////////////////////////////// CCMSFileData::CCMSFileData() { } ///////////////////////////////////////////////////////////////// CCMSFileData::~CCMSFileData() { } ///////////////////////////////////////////////////////////////// bool CCMSFileData::Read() { // Check if file exists if (Exists() == false) return false; // Open the file if (Open() == false) { Close(); return false; } return true; } ///////////////////////////////////////////////////////////////// bool CCMSFileData::ReadHeader() { // Check if file exists if (Exists() == false) return false; // Open the file if (Open(true) == false) { Close(); return false; } return true; } ///////////////////////////////////////////////////////////////// bool CCMSFileData::Open(bool bReadHeaderOnly) { // First close the file. Close(); return ReadFile(bReadHeaderOnly); } ///////////////////////////////////////////////////////////////// void CCMSFileData::Close() { m_arrayList.clear(); m_snpList.clear(); } ///////////////////////////////////////////////////////////////// bool CCMSFileData::ReadFile(bool bReadHeaderOnly) { try { ostringstream strstrm; int iCT=0; string strSection, strKey, strValue, strUnit, strChipType; int nValue=0; int nUnit=0; CIniFile iniFile; // Get file version strValue = iniFile.GetValue(CMS_HEADER_KEY_VERSION, CMS_SECTION_HEADER, m_strFileName); nValue = atoi(strValue.c_str()); m_Header.SetVersion(nValue); // Get assay strValue = iniFile.GetValue(CMS_HEADER_KEY_ASSAY, CMS_SECTION_HEADER, m_strFileName); m_Header.SetAssay(strValue.c_str()); // Get array type count strValue = iniFile.GetValue(CMS_HEADER_KEY_ARRAYCOUNT, CMS_SECTION_HEADER, m_strFileName); nValue = atoi(strValue.c_str()); m_Header.SetArrayCount(nValue); // Get common SNP count strValue = iniFile.GetValue(CMS_HEADER_KEY_SNPCOUNT, CMS_SECTION_HEADER, m_strFileName); nValue = atoi(strValue.c_str()); m_Header.SetSNPCount(nValue); // Stop if just reading the header. if (bReadHeaderOnly) return true; // Read data section list of array types strValue = iniFile.GetValue(CMS_DATA_TYPES, CMS_SECTION_DATA, m_strFileName); // Parse array types string::size_type lastPos = strValue.find_first_not_of(CMS_DATA_DEL, 0); string::size_type pos = strValue.find_first_of(CMS_DATA_DEL, lastPos); while (std::string::npos != pos || std::string::npos != lastPos) { // Get and store all parsed tokens strChipType = strValue.substr(lastPos, pos - lastPos); m_arrayList.insert(map::value_type(iCT++, strChipType)); // Skip delimiters and find next "non-delimiter" (1st delimiter) lastPos = strValue.find_first_not_of(CMS_DATA_DEL, pos); pos = strValue.find_first_of(CMS_DATA_DEL, lastPos); } // Loop to get common SNPs for (int iSNP=0; iSNP::value_type(strChipType, nUnit)); } // Skip delimiters and find next "non-delimiter" (1st delimiter) lastPos = strValue.find_first_not_of(CMS_DATA_DEL, pos); pos = strValue.find_first_of(CMS_DATA_DEL, lastPos); } // Insert common SNP structure m_snpList.push_back(snp); } } catch(...) { m_strError = "Unspecified read error."; return false; } return true; } ///////////////////////////////////////////////////////////////// bool CCMSFileData::Exists() { // Find the file stats. struct stat st; if (stat(m_strFileName.c_str(), &st) == 0) { return true; } else { m_strError = "CMS file " + m_strFileName + " doesn't exist"; return false; } } affxparser/src/fusion/file/CMSFileData.h0000644000175200017520000001007014516003651021132 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _CmsFileData_h_ #define _CmsFileData_h_ ////////////////////////////////////////////////////////////////////// #ifdef _MSC_VER #pragma warning(disable: 4786) // identifier was truncated in the debug information #endif ////////////////////////////////////////////////////////////////////// // Standard template library #include #include #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affxcms { // File version #define CMS_FILE_VERSION 1 // Section constants #define CMS_SECTION_HEADER "HEADER" #define CMS_SECTION_DATA "DATA" // HEADER section key constants #define CMS_HEADER_KEY_VERSION "Version" #define CMS_HEADER_KEY_ASSAY "Assay" #define CMS_HEADER_KEY_ARRAYCOUNT "ArrayCount" #define CMS_HEADER_KEY_SNPCOUNT "SNPCount" // DATA section key constants #define CMS_DATA_TYPES "Types" #define CMS_DATA_SNP "SNP" #define CMS_DATA_DEL "\t" // Parameter input type enum CMSInputType { CMS_MAPPING=1 }; typedef struct _SNPTypeInfo { std::string snpID; std::map chiptype_to_unit; } SNPTypeInfo; typedef std::list SNPTypeInfoList; ////////////////////////////////////////////////////////////////////// // CMS file header class ////////////////////////////////////////////////////////////////////// class CCMSFileHeader { protected: int m_nVersion; // File version number int m_nArrayCount; // Number of array types int m_nSNPCount; // Number of common SNPs std::string m_strAssay; // Assay name public: // Get accessors int GetVersion() const { return m_nVersion; } int GetArrayCount() const { return m_nArrayCount; } int GetSNPCount() const { return m_nSNPCount; } std::string GetAssay() const { return m_strAssay; } // Set accessors void SetVersion(int value) { m_nVersion = value; } void SetArrayCount(int value) { m_nArrayCount = value; } void SetSNPCount(int value) { m_nSNPCount = value; } void SetAssay(const char *value) { m_strAssay = value; } // Constructors CCMSFileHeader(); }; ////////////////////////////////////////////////////////////////////// // Class: CCMSFileData // Description: CMS file class ////////////////////////////////////////////////////////////////////// class CCMSFileData { protected: CCMSFileHeader m_Header; std::string m_strFileName; std::string m_strError; std::map m_arrayList; SNPTypeInfoList m_snpList; bool Open(bool bReadHeaderOnly=false); bool ReadFile(bool bReadHeaderOnly=false); public: // Paths and file name accessors void SetFileName(const char *value) { m_strFileName = value; } std::string GetFileName() const { return m_strFileName; } std::string GetError() const { return m_strError; } // Header accessors CCMSFileHeader &GetHeader() { return m_Header; } // Data accessors std::map &ArrayTypeInformation() { return m_arrayList; } SNPTypeInfoList &SNPInformation() { return m_snpList; } // IO methods bool Read(); bool ReadHeader(); bool Exists(); void Close(); // Constructor/Destructor CCMSFileData(); ~CCMSFileData(); }; //////////////////////////////////////////////////////////////////// } // namespace #endif // _CmsFileData_h_ affxparser/src/fusion/file/DATFileData.cpp0000644000175200017520000001606214516003651021462 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2004 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "file/DATFileData.h" // #include "file/FileIO.h" #include "file/FileWriter.h" // #include #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #endif #ifdef _INCLUDE_UNISTD_HEADER_ #include #endif using namespace affxdat; using namespace std; /*! The DAT file magic number. */ #define DAT_FILE_MAGIC_NUMBER 0xFC /*! The size of the header. */ #define DAT_FILE_HEADER_SIZE 512 ////////////////////////////////////////////////////////////////////// CDATFileHeaderData::~CDATFileHeaderData() { } ////////////////////////////////////////////////////////////////////// CDATFileHeaderData::CDATFileHeaderData() { m_nCols = 0; m_nRows = 0; m_MinValue = (uint16_t) -1; m_MaxValue = 0; } ////////////////////////////////////////////////////////////////////// bool CDATFileData::Exists() { // Find the file stats. struct stat st; return (stat(m_FileName.c_str(), &st) == 0); } ////////////////////////////////////////////////////////////////////// bool CDATFileData::Read() { // First close the previously opened file. Close(); // Open the file. m_File.open(m_FileName.c_str(), ios::in|ios::binary); if (!m_File) { return false; } // Check if new type. uint8_t magic=0; ReadUInt8(m_File, magic); if (magic != DAT_FILE_MAGIC_NUMBER) { return false; } // Read the rows and columns. uint16_t cols; uint16_t rows; ReadUInt16_I(m_File, cols); ReadUInt16_I(m_File, rows); m_HeaderData.SetRows(rows); m_HeaderData.SetCols(cols); // The total number of pixels. uint32_t total; ReadUInt32_I(m_File, total); // Read the min/max values uint32_t min; uint32_t max; ReadUInt32_I(m_File, min); ReadUInt32_I(m_File, max); m_HeaderData.SetMinValue(min); m_HeaderData.SetMaxValue(max); // Skip the unused entries m_File.seekg(2*sizeof(double) + 9 + 9 + 7 + 7 + 6 + 7 + 4 + 18, ios::cur); // Get the next item from the DAT file. This should include the scanner ID // and the array type. char buf[221]; m_File.read(buf, 220); int len=0; char *endIndex = strstr(buf, ".1sq"); if (endIndex != NULL) { int index = (int)(strlen(buf) - strlen(endIndex)); while (index >= 0 && buf[index] != ' ') { ++len; --index; } char *arrayType = &buf[index+1]; arrayType[len-1] = 0; m_HeaderData.SetArrayType(arrayType); index=0; while (buf[index] != ' ') ++index; buf[index] = 0; m_HeaderData.SetScannerID(buf); } // Skip more data m_File.seekg(2*sizeof(double) + sizeof(int32_t), ios::cur); // Now get the grid. int16_t coord; GridCoordinatesType grid; ReadInt16_I(m_File, coord); grid.upperleft.x = (int32_t) coord; ReadInt16_I(m_File, coord); grid.upperleft.y = (int32_t) coord; ReadInt16_I(m_File, coord); grid.upperright.x = (int32_t) coord; ReadInt16_I(m_File, coord); grid.upperright.y = (int32_t) coord; ReadInt16_I(m_File, coord); grid.lowerright.x = (int32_t) coord; ReadInt16_I(m_File, coord); grid.lowerright.y = (int32_t) coord; ReadInt16_I(m_File, coord); grid.lowerleft.x = (int32_t) coord; ReadInt16_I(m_File, coord); grid.lowerleft.y = (int32_t) coord; m_HeaderData.SetGridCorners(grid); return true; } ////////////////////////////////////////////////////////////////////// uint16_t CDATFileData::GetPixel(int x, int y) { uint16_t pixel; int32_t offset = DAT_FILE_HEADER_SIZE + (y*m_HeaderData.GetCols() + x)*sizeof(uint16_t); m_File.seekg(offset, ios::beg); ReadUInt16_I(m_File, pixel); return pixel; } //////////////////////////////////////////////////////////////////////// void CDATFileData::GetPixels(int rowIndex, uint16_t *pixels) { int cols = m_HeaderData.GetCols(); uint16_t pixel; int32_t offset = DAT_FILE_HEADER_SIZE + (rowIndex*cols*sizeof(uint16_t)); m_File.seekg(offset, ios::beg); m_File.read((char *)pixels, sizeof(uint16_t) * cols); for (int i=0; i #include #include // #ifdef sun #include #endif ////////////////////////////////////////////////////////////////////// namespace affxdat { /*! This class provides storage for the header of a GCOS DAT file. */ class CDATFileHeaderData { public: /*! Constructor */ CDATFileHeaderData(); /*! Destructor */ ~CDATFileHeaderData(); protected: /*! The number of pixel columns. */ int32_t m_nCols; /*! The number of pixel rows. */ int32_t m_nRows; /*! The minimum pixel value. */ uint16_t m_MinValue; /*! The maximum pixel value. */ uint16_t m_MaxValue; /*! The grid coordinates. */ GridCoordinatesType m_Grid; /*! The probe array type. */ std::string m_ArrayType; /*! The scanner ID */ std::string m_ScannerID; public: /*! Gets the number of pixel columns. * @return The number of pixel columns. */ int32_t GetCols() const { return m_nCols; } /*! Sets the number of pixel columns. * @param cols The number of pixel columns. */ void SetCols(int32_t cols) { m_nCols = cols; } /*! Gets the number of pixel rows. * @return The number of pixel rows. */ int32_t GetRows() const { return m_nRows; } /*! Sets the number of pixel rows. * @param rows The number of pixel rows. */ void SetRows(int32_t rows) { m_nRows = rows; } /*! Gets the minimum pixel value. * @return The minimum pixel value. */ uint16_t GetMinValue() const { return m_MinValue; } /*! Sets the minimum pixel value. * @param val The minimum pixel value. */ void SetMinValue(uint16_t val) { m_MinValue = val; } /*! Gets the maximum pixel value. * @return The maximum pixel value. */ uint16_t GetMaxValue() const { return m_MaxValue; } /*! Sets the maximum pixel value. * @param val The maximum pixel value. */ void SetMaxValue(uint16_t val) { m_MaxValue = val; } /*! Returns the total number of pixels. * @return The total number of pixels. */ int32_t GetNumPixels() const { return m_nCols * m_nRows; } /*! Gets the grid coordinates. * @return The grid coordinates. */ const GridCoordinatesType &GetGridCorners() { return m_Grid; } /*! Sets the grid coordinates. * @param grid The grid coordinates. */ void SetGridCorners(const GridCoordinatesType &grid) { m_Grid = grid; } /*! Gets the probe array type. * @return The probe array type. */ std::string GetArrayType() const { return m_ArrayType; } /*! Sets the array type. * @param arrayType The probe array type. */ void SetArrayType(const char *arrayType) { m_ArrayType = arrayType; } /*! Gets the scanner ID. * @return The scanner ID. */ std::string GetScannerID() const { return m_ScannerID; } /*! Sets the array type. * @param scannerID The scanner ID. */ void SetScannerID(const char *scannerID) { m_ScannerID = scannerID; } }; ////////////////////////////////////////////////////////////////////// /*! This class provides parsing and access interfaces to a GCOS DAT file. */ class CDATFileData { public: /*! Constructor */ CDATFileData(); /*! Destructor */ ~CDATFileData(); protected: /*! The full path of the DAT file. */ std::string m_FileName; /*! The DAT file header */ CDATFileHeaderData m_HeaderData; /*! The file handle. */ std::ifstream m_File; public: /*! Sets the full path of the DAT file. * @param str The full path of the DAT file. */ void SetFileName(const char *str) { m_FileName = str; } /*! Gets the full path of the DAT file. * @return The full path of the DAT file. */ std::string GetFileName() const { return m_FileName; } /*! Gets the DAT file header object. * @return The header. */ CDATFileHeaderData &GetHeader() { return m_HeaderData; } /*! Gets row of data. * @param rowIndex The index to the row to extract. * @param pixels An array to store the pixel data. */ void GetPixels(int rowIndex, uint16_t *pixels); /*! Gets row of data. * @param rowIndex The index to the row to extract. * @param rowCount The number of rows to extract. * @param pixels An array to store the pixel data. */ void GetPixels(int rowIndex, int rowCount, uint16_t *pixels); /*! Gets a single pixel value. * @param x The x coordinate. * @param y The y coordinate. * @return The pixel value at the coordinate. */ uint16_t GetPixel(int x, int y); /*! Checks if the file exists. * @return True if exists. */ bool Exists(); /*! Reads the file. * @return True if successful. */ bool Read(); /*! Closes the file when finished accessing it. */ void Close(); /*! Update the grid coordinates in a DAT file. * @param datFile The full path name of the DAT file to update. * @param grid The new grid coordinates. * @return True if successful. */ static bool UpdateGridCorners(const char *datFile, const GridCoordinatesType &grid); /*! Determines if the file is a GCOS DAT file. * @param datFile The full path name of the DAT file to update. * @return True if the file is a GCOS DAT file. */ static bool IsGCOSDATFile(const char *datFile); }; ////////////////////////////////////////////////////////////////////// } // namespace //////////////////////////////////////////////////////////////////// #endif // !defined(_DATFileData_HEADER_) affxparser/src/fusion/file/DttArrayFileReader.cpp0000644000175200017520000000501714516003651023133 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/DttArrayFileReader.h" // #include "file/SAXDttArrayHandlers.h" // #include #include // #include #include // using namespace affymetrix_dttarray; XERCES_CPP_NAMESPACE_USE; /* * Initialize the class. */ DttArrayData::DttArrayData() { } /* * Clear the data. */ DttArrayData::~DttArrayData() { Clear(); } /* * Clear the data. */ void DttArrayData::Clear() { attributes.clear(); arrayType = ""; expName = ""; } /* * Initialize the class. */ DttArrayFileReader::DttArrayFileReader() { } /* * Clear the data. */ DttArrayFileReader::~DttArrayFileReader() { } /* * Read the entire file using the XML SAX parser. */ bool DttArrayFileReader::Read(DttArrayData &arrayData) { arrayData.Clear(); // Initialize the XML4C2 system try { XMLPlatformUtils::Initialize(); } catch (const XMLException&) { return false; } bool status = false; SAXParser* parser = new SAXParser; parser->setValidationScheme(SAXParser::Val_Never); parser->setLoadExternalDTD(false); parser->setDoNamespaces(false); parser->setDoSchema(false); parser->setValidationSchemaFullChecking(false); SAXArrayHandlers handler(&arrayData); parser->setDocumentHandler(&handler); parser->setErrorHandler(&handler); try { parser->parse(m_FileName.c_str()); int errorCount = parser->getErrorCount(); if (errorCount == 0) { status = true; } } catch (...) { status = false; } delete parser; XMLPlatformUtils::Terminate(); return status; } /* * Check if the file exists. */ bool DttArrayFileReader::Exists() { // Find the file stats. struct stat st; return (stat(m_FileName.c_str(), &st) == 0); } affxparser/src/fusion/file/DttArrayFileReader.h0000644000175200017520000000750714516003651022606 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _DttArrayFileReader_HEADER_ #define _DttArrayFileReader_HEADER_ /*! \file DttArrayFileReader.h This file provides interfaces to read an array file generated by the DTT1.1 software. * * The Dtt array file is the MAGE-ML file generated by the DTT 1.1 software. */ #include #include #include // namespace affymetrix_dttarray { /*! The parameter name for the gcos sample name. */ #define GCOS_SAMPLE_NAME_PARAMETER_NAME "GCOS Sample Name" /*! The parameter name for the gcos sample type. */ #define GCOS_SAMPLE_TYPE_PARAMETER_NAME "GCOS Sample Type" /*! The parameter name for the gcos sample project. */ #define GCOS_SAMPLE_PROJECT_PARAMETER_NAME "GCOS Sample Project" /*! A structure to hold an attribute name/value/type. */ typedef struct _AttributeNameValueType { /*! The attribute name. */ std::string name; /*! The attribute value. */ std::string value; /*! The attribute type. */ std::string type; } AttributeNameValueType; /*! An STL list of attributes. */ typedef std::list AttributeNameValueTypeList; /*! This class provides storage of the DTT array file data. */ class DttArrayData { private: /*! The probe array type. */ std::string arrayType; /*! The experiment name. */ std::string expName; /*! The attributes. */ AttributeNameValueTypeList attributes; public: /*! Constructor */ DttArrayData(); /*! Destructor */ ~DttArrayData(); /*! Gets the probe array type name. * @return The probe array type. */ std::string GetArrayType() const { return arrayType; } /*! Sets the probe array type name. * @param aType The array type. */ void SetArrayType(std::string const &aType) { arrayType = aType; } /*! Gets the experiemnt name. * @return The experiment name. */ std::string GetExperimentName() const { return expName; } /*! Sets the experiment name. * @param name The experiment name. */ void SetExperimentName(std::string const &name) { expName = name; } /*! Gets the attributes. * @return The sample and experiemnt attributes. */ AttributeNameValueTypeList &Attributes() { return attributes; } /*! Clears the members. */ void Clear(); }; /*! This class provides interfaces to read an array file. */ class DttArrayFileReader { protected: /*! The full path name to the DTT array file. */ std::string m_FileName; public: /*! Constructor */ DttArrayFileReader(); /*! Destructor */ ~DttArrayFileReader(); /*! Gets the file name. * @return The full path of the array file. */ std::string GetFileName() const { return m_FileName; } /*! Sets the file name. * @param file The full path of the array file. */ void SetFileName(const char *file) { m_FileName = file; } /*! Reads the entire contents of the array file. * @param arrayData The array data to read from the file. * @return True if successfully read. */ bool Read(DttArrayData &arrayData); /*! Checks for the existance of the file. * @return True if the file exists. */ bool Exists(); }; }; #endif // _DttArrayFileReader_HEADER_ affxparser/src/fusion/file/EXPFileData.cpp0000644000175200017520000001321014516003651021476 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "file/EXPFileData.h" // #include "file/FileIO.h" // #include #include #include #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #endif using namespace affxexp; using namespace std; /*! The first line of an EXP file */ #define EXP_HEADER_LINE_1 "Affymetrix GeneChip Experiment Information" /*! The second line of an EXP file */ #define EXP_HEADER_LINE_2 "Version" /*! The sample information section name. */ #define SAMPLE_SECTION_NAME "[Sample Info]" /*! The scanner section name. */ #define SCANNER_SECTION_NAME "[Scanner]" /*! The fluidics section name. */ #define FLUIDICS_SECTION_NAME "[Fluidics]" /*! The array type tag name. */ #define ARRAY_TYPE_TAG "Chip Type" /*! The protocol tag name. */ #define PROTOCOL_TAG "Protocol" /*! The station tag name. */ #define STATION_TAG "Station" /* * Trim white space from left and right */ void TrimWhiteSpaces(std::string& s) { string::size_type n; n = s.find_first_not_of( ' ' ); if( n != std::string::npos ) s = s.substr( n ); n = s.find_last_not_of( ' ' ); if( n == std::string::npos ) s.clear(); else s = s.substr( 0, n+1 ); } /* * Initialize the class to null values */ CEXPFileData::CEXPFileData() { } /* * Clean up any used memory */ CEXPFileData::~CEXPFileData() { Clear(); } /* * Deallocate any memory for all the class members and initialize them back to * null or zero values. */ void CEXPFileData::Clear() { m_strArrayType = ""; m_Scan.clear(); m_Hyb.clear(); m_Sample.clear(); } /* * Check for the file existance */ bool CEXPFileData::Exists() { // Find the file stats. struct stat st; return (stat(m_strFileName.c_str(), &st) == 0); } /* * Read the file contents. The first two lines are the header followed by * [Sample Info], [Fluidics] and [Scanner] sections. */ bool CEXPFileData::Read() { // Clear any existing data. Clear(); // Open the file. ifstream instream(m_strFileName.c_str(), ios::in); if (!instream) { Clear(); return false; } // The first two lines are the header. const int LINELENGTH = 1024; char str[LINELENGTH]; ReadNextLine(instream, str, LINELENGTH); if (strncmp(str, EXP_HEADER_LINE_1, strlen(EXP_HEADER_LINE_1)) != 0) { Clear(); return false; } ReadNextLine(instream, str, LINELENGTH); if (strncmp(str, EXP_HEADER_LINE_2, strlen(EXP_HEADER_LINE_2)) != 0) { Clear(); return false; } // The possible sections. typedef enum { NO_SECTION, SAMPLE_SECTION, FLUIDICS_SECTION, SCANNER_SECTION } CurrentSectionType; CurrentSectionType currentSection = NO_SECTION; // The remaining are the sample, fluidics and scanner sections bool captureAll = false; while (!instream.eof()) { ReadNextLine(instream, str, LINELENGTH); // Skip blank lines. if (strlen(str) == 0) continue; // Check for the start of each section. if (strncmp(str, SAMPLE_SECTION_NAME, strlen(SAMPLE_SECTION_NAME)) == 0) { currentSection = SAMPLE_SECTION; continue; } else if (strncmp(str, FLUIDICS_SECTION_NAME, strlen(FLUIDICS_SECTION_NAME)) == 0) { currentSection = FLUIDICS_SECTION; continue; } else if (strncmp(str, SCANNER_SECTION_NAME, strlen(SCANNER_SECTION_NAME)) == 0) { currentSection = SCANNER_SECTION; continue; } // Parse the line into a name/value pair. TagValuePairType param; char *paramStr = strtok(str, "\t"); param.Tag = paramStr; paramStr = strtok(NULL, "\t"); if (paramStr != NULL) param.Value = paramStr; else param.Value = ""; TrimWhiteSpaces(param.Value); // Take everything between the "Protocol" and "Station" tags. These // may include error messages in the tag name with blank values. if (currentSection == FLUIDICS_SECTION && param.Tag == PROTOCOL_TAG) captureAll = true; else if (currentSection == FLUIDICS_SECTION && param.Tag == STATION_TAG) captureAll = false; // If the value is blank then skip it. if (param.Value == "" && captureAll == false) continue; // Double check if the "Protocol" tag is blank and skip it. if (param.Tag == PROTOCOL_TAG && param.Value == "") continue; // Check for the special array type line if (currentSection == SAMPLE_SECTION && param.Tag == ARRAY_TYPE_TAG) { m_strArrayType = param.Value; } // Store the parameter into the sample section. else if (currentSection == SAMPLE_SECTION) { m_Sample.push_back(param); } // Store the parameter into the fluidics section. else if (currentSection == FLUIDICS_SECTION) { m_Hyb.push_back(param); } // Store the parameter into the scanner section. else if (currentSection == SCANNER_SECTION) { m_Scan.push_back(param); } } // Close the file. instream.close(); return true; } ////////////////////////////////////////////////////////////////////// affxparser/src/fusion/file/EXPFileData.h0000644000175200017520000000607714516003651021160 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _EXPFileData_HEADER_ #define _EXPFileData_HEADER_ /*! \file EXPFileData.h This file provides reading capaibilities for EXP files. */ ////////////////////////////////////////////////////////////////////// #include "file/TagValuePair.h" // #include #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affxexp { /*! Stores the contents of a EXP file. */ class CEXPFileData { public: /*! Constructor */ CEXPFileData(); /*! Destructor */ ~CEXPFileData(); protected: /*! The name of the EXP file */ std::string m_strFileName; /*! The array type in the EXP file */ std::string m_strArrayType; /*! A list of scan parameters. */ TagValuePairTypeList m_Scan; /*! A list of hyb parameters. */ TagValuePairTypeList m_Hyb; /*! A list of sample parameters. */ TagValuePairTypeList m_Sample; public: /*! Sets the file name. * * @param name The name of the EXP file. */ void SetFileName(const char *name) { m_strFileName = name; } /*! Gets the file name. * * @return The file name. */ std::string GetFileName() const { return m_strFileName; } /*! Gets the array type. * * @return The probe array type in the mask file. */ std::string GetArrayType() const { return m_strArrayType; } /*! Sets the array type. * * @param arrayType The probe array type. */ void SetArrayType(const char *arrayType) { m_strArrayType = arrayType; } /*! Reads the contents of the file. * * @return True if successful */ bool Read(); /*! Checks for the existance of a file. * * @return True if the file exists */ bool Exists(); /*! Clears memory associated with the class */ void Clear(); /*! Gets the list of scan parameters. * @return The list of scan parameters. */ TagValuePairTypeList &GetScanParameters() { return m_Scan; } /*! Gets the list of hyb parameters. * @return The list of hyb parameters. */ TagValuePairTypeList &GetHybParameters() { return m_Hyb; } /*! Gets the list of sample parameters. * @return The list of sample parameters. */ TagValuePairTypeList &GetSampleParameters() { return m_Sample; } }; } // namespace ////////////////////////////////////////////////////////////////////// #endif affxparser/src/fusion/file/FileIO.cpp0000644000175200017520000003622014516003651020565 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // ////////////////////////////////////////////////////////////////////// #ifdef _MSC_VER #include "windows.h" #endif // #include "file/FileIO.h" // #include "portability/affy-base-types.h" #include "util/Convert.h" #include "util/Err.h" // #include #include #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #else #include #include #include #endif // Some machines (sparc) dont support unaligned memory access // The mmaped files are chock full of unaligned accesses. // When AFFY_UNALIGNED_IN_SW is defined we will do the // alignment in software. // This feature can be enabled on intel for testing. // This is here to enable this feature automaticly on the sparc and ppc. // they cant do unaligned loads. #ifdef __sparc__ #define AFFY_UNALIGNED_IN_SW #endif #ifdef __POWERPC__ #define AFFY_UNALIGNED_IN_SW #endif using namespace std; void ReadUInt32_I(IFSTREAM& instr, uint32_t& val) { uint32_t v=0; instr.read((char*)&v,sizeof(v)); val=itohl(v); } void ReadInt32_I(IFSTREAM& instr,int32_t& val) { ReadUInt32_I(instr,(uint32_t&)val); } void ReadFloat_I(IFSTREAM& instr,float& val) { ReadUInt32_I(instr,(uint32_t&)val); } // void ReadUInt16_I(IFSTREAM& instr, uint16_t& val) { uint16_t v=0; instr.read((char*)&v,sizeof(v)); val=itohs(v); } void ReadInt16_I(IFSTREAM& instr,int16_t& val) { ReadUInt16_I(instr,(uint16_t&)val); } // No byte swapping needed. void ReadUInt8(IFSTREAM& instr, uint8_t& val) { instr.read((char*)&val,sizeof(val)); } void ReadInt8(IFSTREAM& instr,int8_t& val) { ReadUInt8(instr,(uint8_t&)val); } //==================== // // Network byte order // void ReadUInt32_N(IFSTREAM& instr, uint32_t& val) { uint32_t v=0; instr.read((char*)&v,sizeof(v)); val=ntohl(v); } void ReadInt32_N(IFSTREAM& instr,int32_t& val) { ReadUInt32_N(instr,(uint32_t&)val); } void ReadFloat_N(IFSTREAM& instr,float& val) { ReadUInt32_N(instr,(uint32_t&)val); } // void ReadUInt16_N(IFSTREAM& instr, uint16_t& val) { uint16_t v=0; instr.read((char*)&v,sizeof(v)); val=ntohs(v); } void ReadInt16_N(IFSTREAM& instr, int16_t& val) { ReadUInt16_N(instr,(uint16_t&)val); } // When the old BPMAP files were written, the output // values were mangled in this way: // * Start with a float value. // * convert it to an integer. // * byte-swap it // * write it as a float. // This function reverses the process to get the data. void ReadFloatFromOldBPMAP_N(IFSTREAM &instr, float &fval) { type_punned pun; instr.read((char *)&pun.v_uint32,FLOAT_SIZE); #if BYTE_ORDER == BIG_ENDIAN pun.v_uint32=affy_swap32(pun.v_uint32); pun.v_uint32=(uint32_t)pun.v_float; pun.v_uint32=affy_swap32(pun.v_uint32); #else pun.v_uint32=(uint32_t)pun.v_float; pun.v_uint32=ntohl(pun.v_uint32); #endif fval=pun.v_uint32; } //============================== // c char* void ReadFixedCString(IFSTREAM& instr, char* str, uint32_t len) { instr.read(str,len); } void ReadFixedUCString(IFSTREAM& instr, unsigned char* str, uint32_t len) { instr.read((char *)str,len); } void ReadCString_I(IFSTREAM& instr, char*& str) { uint32_t slen; ReadUInt32_I(instr,slen); str=new char[slen+1]; instr.read(str,slen); // ensure null -- this replaces the last char read. str[slen]=0; } void ReadCString_I(IFSTREAM& instr,std::string& str) { uint32_t slen; ReadUInt32_I(instr,slen); str.resize(slen); instr.read(&str[0],slen); } void ReadCharacterArray(IFSTREAM& instr, char* str, uint32_t len) { instr.read(str,len); } // The data file may or may not have the null terminator as // part of the length. Using a STD::STRING function as the buffer to // read directly into may result in the string with a length one more // than is intended. The CHAR * buffer will contain an extra null that // will get removed when copying to the output STD::STRING. void ReadFixedString(IFSTREAM& instr, string& str, uint32_t len) { char *s = new char[len+1]; instr.read(s,len); s[len]=0; str = s; delete[] s; } void ReadString_I(IFSTREAM& instr, string& str) { uint32_t len; ReadUInt32_I(instr,len); ReadFixedString(instr,str,len); } void ReadUIntLenString_I(IFSTREAM& instr, std::string &s) { uint32_t len; ReadUInt32_I(instr, len); ReadFixedString(instr, s, len); } void ReadCString_N(IFSTREAM& instr, char*& str) { uint32_t slen; ReadUInt32_N(instr,slen); str=new char[slen+1]; instr.read(str,slen); // ensure null -- this replaces the last char read. str[slen]=0; } void ReadString_N(IFSTREAM& instr, string& str) { uint32_t len; ReadUInt32_N(instr,len); ReadFixedString(instr,str,len); } void ReadUIntLenString_N(IFSTREAM& instr, std::string &s) { uint32_t len; ReadUInt32_N(instr, len); ReadFixedString(instr, s, len); } void ReadNextLine(IFSTREAM& instr,char* line,int len) { // zero the buffer to make debugging a bit better. memset(line,0,len); strcpy(line,""); while (!instr.eof()) { instr.getline(line,len); if (strlen(line)>0) { if (line[strlen(line)-1]=='\r') line[strlen(line)-1]='\0'; if (strlen(line)>0) return; } } } //============================== // // There are two issues here: alignment and byte order. // Byte order can be handled with the "htoi" functions. // Alignment has to be handled with functions. // The "Get*" functions are for reading data from a mem-mapped file. // OLD: Callers of these really should use "itoh*" directly. // Opps! They cant as "itoh" does not handle aligment. // The signed versions int32_t MmGetInt32_I(int32_t* ptr) { return MmGetUInt32_I((uint32_t*)ptr); } int16_t MmGetInt16_I(int16_t* ptr) { return MmGetUInt16_I((uint16_t*)ptr); } int32_t MmGetInt32_N(int32_t* ptr) { return MmGetUInt32_N((uint32_t*)ptr); } int16_t MmGetInt16_N(int16_t* ptr) { return MmGetUInt16_N((uint16_t*)ptr); } int8_t MmGetInt8(int8_t* ptr) { return MmGetUInt8((uint8_t*)ptr); } //========== // If unaligned accesses to memory are allowed, (intel) // use these functions. #ifndef AFFY_UNALIGNED_IN_SW //// 32 uint32_t MmGetUInt32_I(uint32_t* ptr) { return itohl(*ptr); } void MmSetUInt32_I(uint32_t* ptr,uint32_t val) { *(uint32_t*)ptr=htoil(val); } // uint32_t MmGetUInt32_N(uint32_t* ptr) { return ntohl(*ptr); } void MmSetUInt32_N(uint32_t* ptr,uint32_t val) { *(uint32_t*)ptr=htonl(val); } //// 16 uint16_t MmGetUInt16_I(uint16_t* ptr) { return itohs(*ptr); } void MmSetUInt16_I(uint16_t* ptr,uint16_t val) { *(uint16_t*)ptr=htois(val); } // uint16_t MmGetUInt16_N(uint16_t* ptr) { return ntohs(*ptr); } void MmSetUInt16_N(uint16_t* ptr,uint16_t val) { *(uint16_t*)ptr=htons(val); } // uint8_t MmGetUInt8(uint8_t* ptr) { return *ptr; // no byte-swapping needed. } void MmSetUInt8(uint8_t* ptr,uint8_t val) { *ptr=val; } // float MmGetFloat_I(float* ptr) { type_punned pun; pun.v_float=*ptr; pun.v_uint32=itohl(pun.v_uint32); return pun.v_float; } void MmSetFloat_I(float* ptr,float val) { type_punned pun; pun.v_float=val; pun.v_uint32=htoil(pun.v_uint32); *ptr=pun.v_float; } float MmGetFloat_N(float* ptr) { type_punned pun; pun.v_float=*ptr; pun.v_uint32=ntohl(pun.v_uint32); return pun.v_float; } void MmSetFloat_N(float* ptr,float val) { type_punned pun; pun.v_float=val; pun.v_uint32=htonl(pun.v_uint32); *ptr=pun.v_float; } // See the notes in float MmGetFloatFromOldBPMAP_N(float *ptr) { float fval; #if BYTE_ORDER == LITTLE_ENDIAN // There isnt any punning going on here. // cast, swap, cast back. fval = (float)ntohl((uint32_t)*ptr); #else type_punned pun; pun.v_float=*ptr; pun.v_uint32=affy_swap32(pun.v_uint32); fval=(float)affy_swap32((uint32_t)pun.v_float); #endif return fval; } #endif // We dont have unaligned access to memory, fake it // The conversion from and to little endian is done as part of // the unaligned mem access #ifdef AFFY_UNALIGNED_IN_SW //// 32 uint32_t MmGetUInt32_I(uint32_t* ptr) { uint8_t* cptr=(uint8_t*)ptr; uint32_t val=0; val|=(*cptr++); val|=(*cptr++)<< 8; val|=(*cptr++)<<16; val|=(*cptr++)<<24; return val; } void MmSetUInt32_I(uint32_t* ptr,uint32_t val) { uint8_t* cptr=(uint8_t*)ptr; *cptr++=((val )&0xFF); *cptr++=((val>> 8)&0xFF); *cptr++=((val>>16)&0xFF); *cptr++=((val>>24)&0xFF); } uint32_t MmGetUInt32_N(uint32_t* ptr) { uint8_t* cptr=(uint8_t*)ptr; uint32_t val=0; val|=(*cptr++)<<24; val|=(*cptr++)<<16; val|=(*cptr++)<< 8; val|=(*cptr++); return val; } void MmSetUInt32_N(uint32_t* ptr,uint32_t val) { uint8_t* cptr=(uint8_t*)ptr; // val=htonl(val); // with it in big order *cptr++=((val>>24)&0xFF); *cptr++=((val>>16)&0xFF); *cptr++=((val>> 8)&0xFF); *cptr++=((val )&0xFF); } //// 16 uint16_t MmGetUInt16_I(uint16_t* ptr) { uint8_t* cptr=(uint8_t*)ptr; uint16_t val=0; val|=(*cptr++); val|=(*cptr++)<<8; return val; } void MmSetUInt16_I(uint16_t* ptr,uint16_t val) { uint8_t* cptr=(uint8_t*)ptr; *cptr++=((val )&0xFF); *cptr++=((val>> 8)&0xFF); } uint16_t MmGetUInt16_N(uint16_t* ptr) { uint8_t* cptr=(uint8_t*)ptr; uint16_t val=0; val|=(*cptr++)<< 8; val|=(*cptr++); return val; } void MmSetUInt16_N(uint16_t* ptr,uint16_t val) { uint8_t* cptr=(uint8_t*)ptr; *cptr++=((val>>8)&0xFF); *cptr++=((val )&0xFF); } //// 8 uint8_t MmGetUInt8(uint8_t* ptr) { return *(uint8_t*)ptr; } void MmSetUInt8(uint8_t* ptr,uint8_t val) { *(uint8_t*)ptr=val; } // float MmGetFloat_I(float* ptr) { uint32_t val=MmGetUInt32_I((uint32_t*)ptr); return *((float*)&val); } void MmSetFloat_I(float* ptr,float val) { MmSetUInt32_I((uint32_t*)ptr,*(uint32_t*)&val); } float MmGetFloat_N(float* ptr) { uint32_t val=MmGetUInt32_N((uint32_t*)ptr); return *((float*)&val); } float MmGetFloatFromOldBPMAP_N(float *ptr) { float fval; #if BYTE_ORDER == LITTLE_ENDIAN fval = (float) ntohl((uint32_t)MmGetFloat_I(ptr)); #else int i1=*(int *)ptr; i1=affy_swap32(i1); /// @todo this should be type punned. - jhg fval=(float)affy_swap32((uint32_t)(*(float *)&i1)); // cast to float, then deref #endif return fval; } void MmSetFloat_N(float* ptr,float val) { MmSetUInt32_N((uint32_t*)ptr,*(uint32_t*)&val); } #endif ////////// // // FILE* of the above // #ifdef FILEIO_WITH_STDIO void ReadUInt32_I(FILE* file, uint32_t& val) { uint32_t v=0; fread(&v,sizeof(v),1,file); val=itohl(v); } void ReadInt32_I(FILE* file,int32_t& val) { ReadUInt32_I(file,(uint32_t&)val); } void ReadFloat_I(FILE* file,float& val) { ReadUInt32_I(file,(uint32_t&)val); } // void ReadUInt16_I(FILE* file, uint16_t& val) { uint16_t v=0; fread(&v,sizeof(v),1,file); val=itohs(v); } void ReadInt16_I(FILE* file,int16_t& val) { ReadUInt16_I(file,(uint16_t&)val); } //==================== // // No byte swapping needed for bytes // void ReadUInt8(FILE* file, uint8_t& val) { fread((char*)&val,sizeof(val),1,file); } void ReadInt8(FILE* file,int8_t& val) { ReadUInt8(file,(uint8_t&)val); } //==================== // // Network (big-endian) byte order // void ReadUInt32_N(FILE* file, uint32_t& val) { uint32_t v=0; fread(&v,sizeof(v),1,file); val=ntohl(v); } void ReadInt32_N(FILE* file,int32_t& val) { ReadUInt32_N(file,(uint32_t&)val); } void ReadFloat_N(FILE* file,float& val) { ReadUInt32_N(file,(uint32_t&)val); } // void ReadUInt16_N(FILE* file, uint16_t& val) { uint16_t v=0; fread((char*)&v,sizeof(v),1,file); val=ntohs(v); } void ReadInt16_N(FILE* file, int16_t& val) { ReadUInt16_N(file,(uint16_t&)val); } void ReadCString_I(FILE* file, char*& str) { uint32_t slen; ReadUInt32_I(file,slen); str=new char[slen+1]; fread(str,slen,1,file); str[slen]=0; } void ReadCString_I(FILE* file,std::string& str) { uint32_t slen; ReadUInt32_I(file,slen); str.resize(slen); fread(&str[0],slen,1,file); } #endif #ifdef FILEIO_WITH_ZLIB void ReadUInt32_I(gzFile gzfile, uint32_t& val) { uint32_t v=0; int cnt=gzread(gzfile,&v,sizeof(v)); if (cnt!=sizeof(v)) { Err::errAbort("ReadUInt32_I: bad read."); } val=itohl(v); // printf("ReadUInt32_I(%p)==%d\n",gzfile,val); } void ReadInt32_I(gzFile gzfile,int32_t& val) { ReadUInt32_I(gzfile,(uint32_t&)val); } void ReadFloat_I(gzFile gzfile,float& val) { ReadUInt32_I(gzfile,(uint32_t&)val); } // void ReadUInt16_I(gzFile gzfile, uint16_t& val) { uint16_t v=0; int cnt=gzread(gzfile,&v,sizeof(v)); if (cnt!=sizeof(v)) { Err::errAbort("ReadUInt16_I: bad read."); } val=itohs(v); } void ReadInt16_I(gzFile gzfile,int16_t& val) { ReadUInt16_I(gzfile,(uint16_t&)val); } //==================== // // No byte swapping needed for bytes // void ReadUInt8(gzFile gzfile, uint8_t& val) { int cnt=gzread(gzfile,(char*)&val,sizeof(val)); if (cnt!=sizeof(val)) { Err::errAbort("ReadUInt8: bad read."); } } void ReadInt8(gzFile gzfile,int8_t& val) { ReadUInt8(gzfile,(uint8_t&)val); } //==================== // // Network (big-endian) byte order // void ReadUInt32_N(gzFile gzfile, uint32_t& val) { uint32_t v=0; int cnt=gzread(gzfile,&v,sizeof(v)); if (cnt!=sizeof(v)) { Err::errAbort("ReadUInt32_N: bad read."); } val=ntohl(v); } void ReadInt32_N(gzFile gzfile,int32_t& val) { ReadUInt32_N(gzfile,(uint32_t&)val); } void ReadFloat_N(gzFile gzfile,float& val) { ReadUInt32_N(gzfile,(uint32_t&)val); } // void ReadUInt16_N(gzFile gzfile, uint16_t& val) { uint16_t v=0; int cnt=gzread(gzfile,&v,sizeof(v)); if (cnt!=sizeof(v)) { Err::errAbort("ReadUInt16_N: bad read."); } val=ntohs(v); } void ReadInt16_N(gzFile gzfile, int16_t& val) { ReadUInt16_N(gzfile,(uint16_t&)val); } void ReadCString_I(gzFile gzfile,std::string& str) { uint32_t slen; ReadUInt32_I(gzfile,slen); str.resize(slen); int cnt=gzread(gzfile,&str[0],slen); if (cnt!=slen) { Err::errAbort("ReadCString_I: bad read. ("+ToStr(cnt)+"!="+ToStr(slen)+")"); } /* printf("== %3d: '",slen); for (int i=0;i #include #include #include // // //#define FILEIO_WITH_STDIO 1 //#define FILEIO_WITH_ZLIB 1 // #ifdef FILEIO_WITH_ZLIB #include "zlib.h" #endif // Affy uses little-endian byte order for some of their files. // This requires defining a the complements to the normal functions "hton". // These functions are intended to be used in conjunction with // the writer functions in FileWriter.h. The general format of the // function names is: {Read,Write}{UInt,Int,Float}{8,16,32}{N,I} /*! Shuffle operator for 32 bit values * @param x The value to shuffle * @return The shuffled value */ inline uint32_t affy_swap32(uint32_t x) { return ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)); } /*! Shuffle operator for 16 bit values * @param x The value to shuffle * @return The shuffled value */ inline uint16_t affy_swap16(uint16_t x) { return ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8)); } // Do we need to define our "to little-endian" operators? #if BYTE_ORDER == BIG_ENDIAN /*! Changes the 32 bit bit data to little endian order */ #define htoil(x) affy_swap32(x) /*! Changes the 16 bit data to little endian order */ #define htois(x) affy_swap16(x) /*! Changes the 32 bit data little endian to host order */ #define itohl(x) affy_swap32(x) /*! Changes the 16 bit little endian data to host order */ #define itohs(x) affy_swap16(x) #else // the machine is little-endian /*! Identity operators */ #define htoil(x) (x) /*! Identity operators */ #define htois(x) (x) /*! Identity operators */ #define itohl(x) (x) /*! Identity operators */ #define itohs(x) (x) #endif #ifndef IFSTREAM /*! Input STL stream object */ #define IFSTREAM std::istream #endif /*! Reads an unsigned 32 bit integer from a file * @param instr The input file stream * @param val The returned value */ void ReadUInt32_I(IFSTREAM& instr, uint32_t& val); /*! Reads a signed 32 bit integer from a file * @param instr The input file stream * @param val The returned value */ void ReadInt32_I(IFSTREAM& instr, int32_t& val); /*! Reads a float from a file * @param instr The input file stream * @param val The returned value */ void ReadFloat_I(IFSTREAM& instr, float& val); /*! Reads an unsigned 16 bit integer from a file * @param instr The input file stream * @param val The returned value */ void ReadUInt16_I(IFSTREAM& instr, uint16_t& val); /*! Reads a signed 16 bit integer from a file * @param instr The input file stream * @param val The returned value */ void ReadInt16_I(IFSTREAM& instr, int16_t& val); /*! Reads a string from a little endian file * @param instr The input file stream * @param str The returned value */ void ReadCString_I(IFSTREAM& instr, char*& str); void ReadCString_I(IFSTREAM& instr,std::string& str); /*! Reads a string from a little endian file * @param instr The input file stream * @param str The returned value */ void ReadString_I(IFSTREAM& instr, std::string& str); /*! Reads a string frome a little endian file where the string length is stored as an unsigned integer. * @param instr The input file stream * @param s The returned value */ void ReadUIntLenString_I(IFSTREAM& instr, std::string &s); /*! Reads an unsigned 8 bit value from a file * @param instr The input file stream * @param val The returned value */ void ReadUInt8(IFSTREAM& instr, uint8_t& val); /*! Reads an 8 bit value from a file * @param instr The input file stream * @param val The returned value */ void ReadInt8(IFSTREAM& instr, int8_t& val); /*! Reads an unsigned 32 bit value from a big endian file * @param instr The input file stream * @param val The returned value */ void ReadUInt32_N(IFSTREAM& instr, uint32_t& val); /*! Reads a signed 32 bit value from a big endian file * @param instr The input file stream * @param val The returned value */ void ReadInt32_N(IFSTREAM& instr, int32_t& val); /*! Reads a float value from a big endian file * @param instr The input file stream * @param val The returned value */ void ReadFloat_N(IFSTREAM& instr, float& val); /*! Reads an unsigned 16 bit value from a big endian file * @param instr The input file stream * @param val The returned value */ void ReadUInt16_N(IFSTREAM& instr, uint16_t& val); /*! Reads a signed 16 bit value from a big endian file * @param instr The input file stream * @param val The returned value */ void ReadInt16_N(IFSTREAM& instr, int16_t& val); /*! Reads a string from a big endian file * @param instr The input file stream * @param str The returned value */ void ReadCString_N(IFSTREAM& instr, char*& str); /*! Reads a string from a big endian file * @param instr The input file stream * @param str The returned value */ void ReadString_N(IFSTREAM& instr, std::string& str); /*! Reads a string frome a big endian file where the string length is stored as an unsigned integer. * @param instr The input file stream * @param s The returned value */ void ReadUIntLenString_N(IFSTREAM& instr, std::string &s); /*! Reads a fixed length string from a file. * @param instr The input file stream * @param str The returned string value. * @param len The length of the string to read */ void ReadFixedString(IFSTREAM& instr, std::string& str, uint32_t len); /*! Reads a fixed length string from a file. * @param instr The input file stream * @param str The returned string value. * @param len The length of the string to read */ void ReadFixedCString(IFSTREAM& instr, char* str, uint32_t len); /*! Reads a fixed length string from a file. * @param instr The input file stream * @param str The returned string value. * @param len The length of the string to read. */ void ReadFixedUCString(IFSTREAM& instr, unsigned char* str, uint32_t len); /*! Reads a fixed length array of characters from a file. * @param instr The input file stream * @param str The returned string value. * @param len The length of the string to read */ void ReadCharacterArray(IFSTREAM& instr, char* str, uint32_t len); /*! Reads the next line from a text file. * @param instr The input file stream * @param line The returned string (the data must already be allocated) * @param len The maximum length of the line to read. */ void ReadNextLine(IFSTREAM& instr,char* line, int len); /*! This function is for older BPMAP file with incorrectly written float's * The floats were written as ints * @param instr The input file stream. * @param fval The returned float */ void ReadFloatFromOldBPMAP_N(IFSTREAM& instr, float &fval); // #ifdef FILEIO_WITH_STDIO void ReadUInt32_I(FILE* file, uint32_t& val); void ReadInt32_I(FILE* file, int32_t& val); void ReadFloat_I(FILE* file, float& val); void ReadUInt16_I(FILE* file, uint16_t& val); void ReadInt16_I(FILE* file, int16_t& val); void ReadCString_I(FILE* file, char*& str); void ReadCString_I(FILE* file,std::string& str); void ReadString_I(FILE* file, std::string& str); void ReadUIntLenString_I(FILE* file, std::string &s); void ReadUInt8(FILE* file, uint8_t& val); void ReadInt8(FILE* file, int8_t& val); void ReadUInt32_N(FILE* file, uint32_t& val); void ReadInt32_N(IFSTREAM& instr, int32_t& val); void ReadFloat_N(FILE* file, float& val); void ReadUInt16_N(FILE* file, uint16_t& val); void ReadInt16_N(FILE* file, int16_t& val); // unimplemented. //void ReadCString_N(FILE* file, char*& str); //void ReadString_N(FILE* file, std::string& str); //void ReadUIntLenString_N(FILE* file, std::string &s); //void ReadFixedString(FILE* file, std::string& str, uint32_t len); //void ReadFixedCString(FILE* file, char* str, uint32_t len); //void ReadFixedUCString(FILE* file, unsigned char* str, uint32_t len); //void ReadCharacterArray(FILE* file, char* str, uint32_t len); //void ReadNextLine(FILE* file,char* line, int len); //void ReadFloatFromOldBPMAP_N(FILE* file, float &fval); #endif // #ifdef FILEIO_WITH_ZLIB void ReadUInt32_I(gzFile gzfile, uint32_t& val); void ReadInt32_I(gzFile gzfile, int32_t& val); void ReadFloat_I(gzFile gzfile, float& val); void ReadUInt16_I(gzFile gzfile, uint16_t& val); void ReadInt16_I(gzFile gzfile, int16_t& val); void ReadCString_I(gzFile gzfile, char*& str); void ReadCString_I(gzFile gzfile,std::string& str); void ReadString_I(gzFile gzfile, std::string& str); void ReadUIntLenString_I(gzFile gzfile, std::string &s); void ReadUInt8(gzFile gzfile, uint8_t& val); void ReadInt8(gzFile gzfile, int8_t& val); void ReadUInt32_N(gzFile gzfile, uint32_t& val); void ReadInt32_N(gzFile gzfile, int32_t& val); void ReadFloat_N(gzFile gzfile, float& val); void ReadUInt16_N(gzFile gzfile, uint16_t& val); void ReadInt16_N(gzFile gzfile, int16_t& val); // unimplemented. //void ReadCString_N(gzFile gzfile, char*& str); //void ReadString_N(gzFile gzfile, std::string& str); //void ReadUIntLenString_N(gzFile gzfile, std::string &s); void ReadFixedString(gzFile gzfile, std::string& str, uint32_t len); void ReadFixedCString(gzFile gzfile, char* str, uint32_t len); //void ReadFixedUCString(gzFile gzfile, unsigned char* str, uint32_t len); //void ReadCharacterArray(gzFile gzfile, char* str, uint32_t len); //void ReadNextLine(gzFile gzfile,char* line, int len); //void ReadFloatFromOldBPMAP_N(gzFile gzfile, float &fval); #endif /*! Gets a 32 bit unsigned int from a little endian data stream. * @param ptr A pointer to a little endian data stream * @return The host ordered value */ uint32_t MmGetUInt32_I(uint32_t* ptr); /*! Gets a 32 bit signed int from a little endian data stream. * @param ptr A pointer to a little endian data stream * @return The host ordered value */ int32_t MmGetInt32_I(int32_t* ptr); /*! Gets a 16 bit unsigned int from a little endian data stream. * @param ptr A pointer to a little endian data stream * @return The host ordered value */ uint16_t MmGetUInt16_I(uint16_t* ptr); /*! Gets a 16 bit signed int from a little endian data stream. * @param ptr A pointer to a little endian data stream * @return The host ordered value */ int16_t MmGetInt16_I(int16_t* ptr); /*! Gets a 8 bit unsigned int from a data stream. * @param ptr A pointer to the data stream * @return The host ordered value */ uint8_t MmGetUInt8(uint8_t* ptr); /*! Gets a 8 bit signed int from a data stream. * @param ptr A pointer to the data stream * @return The host ordered value */ int8_t MmGetInt8(int8_t* ptr); /*! Gets a 32 bit unsigned int from a big endian data stream. * @param ptr A pointer to a big endian data stream * @return The host ordered value */ uint32_t MmGetUInt32_N(uint32_t* ptr); /*! Gets a 16 bit unsigned int from a big endian data stream. * @param ptr A pointer to a big endian data stream * @return The host ordered value */ uint16_t MmGetUInt16_N(uint16_t* ptr); /*! Gets a float from a big endian data stream. * @param ptr A pointer to a big endian data stream * @return The host ordered value */ float MmGetFloat_N(float* ptr); /*! Gets a 32 bit signed int from a big endian data stream. * @param ptr A pointer to a big endian data stream * @return The host ordered value */ int32_t MmGetInt32_N(int32_t* ptr); /*! Gets a 16 bit signed int from a big endian data stream. * @param ptr A pointer to a big endian data stream * @return The host ordered value */ int16_t MmGetInt16_N(int16_t* ptr); /*! Gets a float from a little endian data stream. * @param ptr A pointer to a little endian data stream * @return The host ordered value */ float MmGetFloat_I(float* ptr); /*! This function is for older BPMAP file with incorrectly written float's * @param ptr A pointer to a little endian data stream * @return The host ordered value */ float MmGetFloatFromOldBPMAP_N(float *ptr); /*! Assigns a 32 bit unsigned value to a little endian data stream. * @param ptr The data stream to set. * @param val The value to store in the data stream. */ void MmSetUInt32_I(uint32_t* ptr,uint32_t val); /*! Assigns a 16 bit unsigned value to a little endian data stream. * @param ptr The data stream to set. * @param val The value to store in the data stream. */ void MmSetUInt16_I(uint16_t* ptr,uint16_t val); /*! Assigns an 8 bit unsigned value to a data stream. * @param ptr The data stream to set. * @param val The value to store in the data stream. */ void MmSetUInt8(uint8_t* ptr,uint8_t val); /*! Assigns a float value to a little endian data stream. * @param ptr The data stream to set. * @param val The value to store in the data stream. */ void MmSetFloat_I(float* ptr,float val); /*! Assigns a 32 bit unsigned value to a big endian data stream. * @param ptr The data stream to set. * @param val The value to store in the data stream. */ void MmSetUInt32_N(uint32_t* ptr,uint32_t val); /*! Assigns a 16 bit unsigned value to a big endian data stream. * @param ptr The data stream to set. * @param val The value to store in the data stream. */ void MmSetUInt16_N(uint16_t* ptr,uint16_t val); /*! Assigns a float value to a big endian data stream. * @param ptr The data stream to set. * @param val The value to store in the data stream. */ void MmSetFloat_N(float* ptr,float val); /*! The size of a 32 bit integer */ #define INT32_SIZE sizeof(int32_t ) /*! The size of a 16 bit integer */ #define INT16_SIZE sizeof(int16_t ) /*! The size of an 8 bit integer */ #define INT8_SIZE sizeof(int8_t ) /*! The size of an unsigned 32 bit integer */ #define UINT32_SIZE sizeof(uint32_t) /*! The size of an unsigned 16 bit integer */ #define UINT16_SIZE sizeof(uint16_t) /*! The size of an unsigned 8 bit integer */ #define UINT8_SIZE sizeof(uint8_t ) /*! The size of a floating point number */ #define FLOAT_SIZE sizeof(float) /*! The size of a 32 bit integer */ #define INT_SIZE sizeof(int32_t) // NOTE: LONG should not be used in new code!! Use INT32 or some such. /*! The size of a 32 bit long */ #define LONG_SIZE sizeof(int32_t) /*! The size of a 32 bit unsigned long */ #define ULONG_SIZE sizeof(uint32_t) /*! The size of a 16 bit integer */ #define SHORT_SIZE sizeof(int16_t) /*! The size of a 16 bit unsigned integer */ #define USHORT_SIZE sizeof(uint16_t) /*! The size of a 8 bit character */ #define CHAR_SIZE sizeof(int8_t) /*! The size of a 8 bit unsigned character */ #define UCHAR_SIZE sizeof(uint8_t) #endif // AFFX_FILEIO_H affxparser/src/fusion/file/FileWriter.cpp0000644000175200017520000000745114516003651021536 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// #ifdef _MSC_VER #include "windows.h" #endif // #include "file/FileWriter.h" // #include #include #include #include #include #include // #ifndef _MSC_VER #include #include #include #endif using namespace std; // void WriteUInt32_I(OFSTREAM& outstr,uint32_t val) { val = htoil(val); outstr.write((char *)&val,sizeof(val)); } void WriteInt32_I(OFSTREAM& outstr,int32_t val) { WriteUInt32_I(outstr,(uint32_t)val); } // void WriteUInt32_N(OFSTREAM& outstr,uint32_t val) { val = htonl(val); outstr.write((char *)&val,sizeof(val)); } void WriteInt32_N(OFSTREAM& outstr,int32_t val) { WriteUInt32_N(outstr,(uint32_t)val); } // void WriteUInt16_I(OFSTREAM& outstr,uint16_t val) { val = htois(val); outstr.write((char *)&val,sizeof(val)); } void WriteInt16_I(OFSTREAM& outstr,int16_t val) { WriteUInt16_I(outstr,(uint16_t)val); } void WriteUInt16_N(OFSTREAM& outstr,uint16_t val) { val = htons(val); outstr.write((char *)&val,sizeof(val)); } void WriteInt16_N(OFSTREAM& outstr,int16_t val) { WriteUInt16_N(outstr,(uint16_t)val); } // void WriteUInt8(OFSTREAM& outstr,uint8_t val) { outstr.write((char *)&val,sizeof(val)); } void WriteInt8(OFSTREAM& outstr,int8_t val) { WriteUInt8(outstr,(uint8_t)val); } // void WriteFloat_I(OFSTREAM& outstr,float fval) { type_punned pun; pun.v_float=fval; WriteUInt32_I(outstr,pun.v_uint32); } void WriteFloat_N(OFSTREAM& outstr,float fval) { type_punned pun; pun.v_float=fval; WriteUInt32_N(outstr,pun.v_uint32); } // Customers wanted repeatablity with old // text files with only had a single digit after the decimal point. void WriteFloatLowPrecision(OFSTREAM& outstr,float fval) { fval=(float)((floor((fval+0.05)*10))/10.0); WriteFloat_I(outstr,fval); } //==================== // normal C strings. void WriteFixedCString(OFSTREAM& outstr,const char* str,size_t outlen) { uint32_t slen=(uint32_t)strlen(str); outstr.write(str,((outlen #include #include // #ifndef OFSTREAM /*! The output stream object */ #define OFSTREAM std::ofstream #endif /*! Writes an unsigned 32 bit int to a little endian file * @param outstr The output file stream * @param val The value to write */ void WriteUInt32_I(OFSTREAM& outstr,uint32_t val); /*! Writes a signed 32 bit int to a little endian file * @param outstr The output file stream * @param val The value to write */ void WriteInt32_I(OFSTREAM& outstr,int32_t val); /*! Writes an unsigned 16 bit int to a little endian file * @param outstr The output file stream * @param val The value to write */ void WriteUInt16_I(OFSTREAM& outstr,uint16_t val); /*! Writes a signed 16 bit int to a little endian file * @param outstr The output file stream * @param val The value to write */ void WriteInt16_I(OFSTREAM& outstr,int16_t val); /*! Writes an unsigned 32 bit int to a big endian file * @param outstr The output file stream * @param val The value to write */ void WriteUInt32_N(OFSTREAM& outstr,uint32_t val); /*! Writes a signed 32 bit int to a big endian file * @param outstr The output file stream * @param val The value to write */ void WriteInt32_N(OFSTREAM& outstr,int32_t val); /*! Writes an unsigned 16 bit int to a big endian file * @param outstr The output file stream * @param val The value to write */ void WriteUInt16_N(OFSTREAM& outstr,uint16_t val); /*! Writes a signed 16 bit int to a big endian file * @param outstr The output file stream * @param val The value to write */ void WriteInt16_N(OFSTREAM& outstr,int16_t val); /*! Writes an unsigned 8 bit int to a file * @param outstr The output file stream * @param val The value to write */ void WriteUInt8(OFSTREAM& outstr,uint8_t val); /*! Writes a signed 8 bit int to a file * @param outstr The output file stream * @param val The value to write */ void WriteInt8(OFSTREAM& outstr,int8_t val); /*! Writes a float int to a big endian file * @param outstr The output file stream * @param val The value to write */ void WriteFloat_N(OFSTREAM& outstr,float val); /*! Writes a float int to a little endian file * @param outstr The output file stream * @param val The value to write */ void WriteFloat_I(OFSTREAM& outstr,float val); /*! Writes a float to a little endian file with just 1 digit of precision. * @param outstr The output file stream * @param val The value to write */ void WriteFloatLowPrecision(OFSTREAM& outstr,float val); /*! Writes a string to the output file * @param outstr The output file stream. * @param str The string value to write * @param len The length of the string */ void WriteFixedCString(OFSTREAM& outstr,const char* str,size_t len); /*! Writes a string to a little endian binary file * @param outstr The output file stream. * @param str The string value to write */ void WriteCString(OFSTREAM& outstr,const char* str); void WriteCString(OFSTREAM& outstr,std::string str); /*! Writes a fixed number of bytes of a string to the output file * Pads with nulls if the string is shorter than bytes to write * @param outstr The output file stream. * @param str The string value to write * @param len The length of the string */ void WriteFixedString(OFSTREAM& outstr,const std::string str,size_t len); /*! Writes a string to a little endian binary file * Pads with nulls if the string is shorter than bytes to write * @param outstr The output file stream. * @param str The string value to write */ void WriteString_I(OFSTREAM& outstr,const std::string str); /*! Writes a string to a big endian binary file * @param outstr The output file stream. * @param str The string value to write */ void WriteString_N(OFSTREAM& outstr,const std::string str); /*! Writes packed character array to the output file * @param outstr The output file stream. * @param str The string value to write * @param len The length of the string */ void WriteCharacterArray(OFSTREAM& outstr,char* str,size_t len); #ifdef FILEWRITER_USE_OLD_AND_BAD_FUNCTIONS /*! Writes a long to a big endian file - for backwards compability only. Use the newer functions instead */ #define WriteLong(s,v) WriteInt32_N(s,v) /*! Writes an unsigned long to a big endian file - for backwards compability only. Use the newer functions instead */ #define WriteULong(s,v) WriteUInt32_N(s,v) /*! Writes an unsigned char to a file - for backwards compability only. Use the newer functions instead */ #define WriteUChar(s,v) WriteUInt8(s,v) /*! Writes a short to a big endian file - for backwards compability only. Use the newer functions instead */ #define WriteShort(s,v) WriteInt16_N(s,v) /*! Writes an unsigned short to a big endian file - for backwards compability only. Use the newer functions instead */ #define WriteUShort(s,v) WriteUInt16_N(s,v) /*! Writes a float to a big endian file - for backwards compability only. Use the newer functions instead */ #define WriteFloat(s,v) WriteFloat_N(s,v) /*! Writes an int to a little big file - for backwards compability only. Use the newer functions instead */ #define WriteInt(s,v) WriteInt32_N(s,v) /*! Writes an int to a little endian file - for backwards compability only. Use the newer functions instead */ #define WRITE_INT(s,v) WriteInt32_I(s,v) /*! Writes an unsigned int to a little endian file - for backwards compability only. Use the newer functions instead */ #define WRITE_UINT(s,v) WriteUInt32_I(s,v) /*! Writes a long to a little endian file - for backwards compability only. Use the newer functions instead */ #define WRITE_LONG(s,v) WriteInt32_I(s,v) /*! Writes an unsigned long to a little endian file - for backwards compability only. Use the newer functions instead */ #define WRITE_ULONG(s,v) WriteUInt32_I(s,v) /*! Writes a short to a little endian file - for backwards compability only. Use the newer functions instead */ #define WRITE_SHORT(s,v) WriteInt16_I(s,v) /*! Writes an int to a little endian file - for backwards compability only. Use the newer functions instead */ #define WRITE_USHORT(s,v) WriteUInt16_I(s,v) /*! Writes a boolean to a little endian file - for backwards compability only. Use the newer functions instead */ #define WRITE_BOOL(s,v) WriteInt8_I(s,v) /*! Writes a char to a file - for backwards compability only. Use the newer functions instead */ #define WRITE_CHAR(s,v) WriteInt8(s,v) /*! Writes an unsigned char to a file - for backwards compability only. Use the newer functions instead */ #define WRITE_UCHAR(s,v) WriteUInt8(s,v) /*! Writes a float to a little endian file - for backwards compability only. Use the newer functions instead */ #define WRITE_FLOAT(s,v) WriteFloat_I(s,v) /*! Writes an low precision float to a little endian file - for backwards compability only. Use the newer functions instead */ #define WRITE_LOW_PRECISION_FLOAT(s,v) WriteFloatLowPrecision(s,v) /*! Writes a string to a little endian file - for backwards compability only. Use the newer functions instead */ #define WRITE_STRING(s,v) WriteCString(s,v.c_str()) #endif // old and bad #endif // AFFY_FILEWRITER_H affxparser/src/fusion/file/GQCFileData.cpp0000644000175200017520000003264714516003651021473 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// //#include #include "file/GQCFileData.h" // #include "calvin_files/utils/src/DateTime.h" #include "calvin_files/utils/src/StringUtils.h" // #include #include #include #include #include #include #include #include #include #include // #include #include #include #include // #define XERCES_STATIC_LIBRARY using namespace std; using namespace affxgqc; using namespace affymetrix_calvin_parameter; using namespace affymetrix_calvin_utilities; XERCES_CPP_NAMESPACE_USE; #define GENERIC_REPORT_FILE_ENCODING "UTF-8" #define GENERIC_REPORT_ELEMENT_NAME "GenericReport" #define NAME_VALUE_PAIRS_ELEMENT_NAME "NameValuePairs" #define NAME_VALUE_PAIR_ELEMENT_NAME "NameValuePair" #define REPORT_RESULTS_ELEMENT_NAME "ReportResults" #define ANALYSIS_PARAMETERS "Analysis Parameters" #define QC_RESULTS "QC Results" #define SAMPLE_SIGNATURE "Sample Signature" #define NAME_ATTRIBUTE "name" #define ROWS_ATTRIBUTE "rows" #define COLS_ATTRIBUTE "cols" #define VALUE_ATTRIBUTE "value" #define TYPE_ATTRIBUTE "type" #define TITLE_ATTRIBUTE "title" #define DATE_ATTRIBUTE "dateCreated" #define REPORT_TYPE "affymetrix-generic-report" #define REPORT_TITLE "Intensity Report" #ifdef _MSC_VER #pragma warning(disable: 4996) // ignore deprecated functions warning #endif /*! Converts a XML character string to string. * @param c1 The XML string to convert * @return The character string */ static string XMLChToString(const XMLCh* const c1) { string s; int i=0; while (c1[i] != 0) { s += (char)c1[i]; ++i; } return s; } /*! This class provides utilities for converting native strings to XML strings. * This class is provided for platforms where the XMLCh is not a wchar_t (Mac OSX) */ class XMLChConversion { private: /*! The XML string */ XMLCh *str; /*! converts an int. * @param i The int. */ void convert(int i) { char cstr[64]; sprintf(cstr, "%d", i); convert(cstr); } /*! Converts a 8 bit string. * @param s The 8 bit string. */ void convert(const char *const s) { clear(); int n=(int)strlen(s); str = new XMLCh[n+1]; for (int i=0; i atts; unsigned int len = attributes.getLength(); for (unsigned int index = 0; index < len; index++) { atts[XMLChToString(attributes.getName(index))] = XMLChToString(attributes.getValue(index)); } // Store the results switch (currentElement) { case GENERIC_REPORT: break; case NAME_VALUE_PAIRS: if (atts[NAME_ATTRIBUTE] == ANALYSIS_PARAMETERS) currentParameters = &fileData->AnalysisParameters(); else if (atts[NAME_ATTRIBUTE] == QC_RESULTS) currentParameters = &fileData->QCResults(); else if (atts[NAME_ATTRIBUTE] == SAMPLE_SIGNATURE) currentParameters = &fileData->SampleSignature(); else currentParameters = NULL; break; case NAME_VALUE_PAIR: if (currentParameters != NULL) { ParameterNameValuePair param; param.Name = StringUtils::ConvertMBSToWCS(atts[NAME_ATTRIBUTE]); param.Value = StringUtils::ConvertMBSToWCS(atts[VALUE_ATTRIBUTE]); currentParameters->push_back(param); } break; default: break; } } /*! Called at the end of each element. * @param name The name of the element. */ void endElement(const XMLCh* const name) { MoveCurrentElementBack(name); } }; /* Construct. */ GQCFileData::GQCFileData() { } /* Clear the data. */ GQCFileData::~GQCFileData() { Clear(); } /* Clear the data. */ void GQCFileData::Clear() { qcResults.clear(); sampleSignature.clear(); analysisParameters.clear(); } /* Read the file. */ bool GQCFileData::Read(const std::string &fileName) { Clear(); // Initialize the XML4C2 system try { XMLPlatformUtils::Initialize(); } catch (const XMLException&) { return false; } bool status = false; SAXParser* parser = new SAXParser; parser->setValidationScheme(SAXParser::Val_Never); parser->setLoadExternalDTD(false); parser->setDoNamespaces(false); parser->setDoSchema(false); parser->setValidationSchemaFullChecking(false); SAXGenericReportHandlers handler(this); parser->setDocumentHandler(&handler); parser->setErrorHandler(&handler); try { parser->parse(fileName.c_str()); int errorCount = parser->getErrorCount(); if (errorCount == 0) { status = true; } } catch (...) { status = false; } delete parser; XMLPlatformUtils::Terminate(); return status; } /* Create a root element */ static DOMElement *CreateGenericReportElement(DOMDocument* doc) { DOMElement* element = doc->createElement(ToXMLCh(GENERIC_REPORT_ELEMENT_NAME)); element->setAttribute(ToXMLCh(TYPE_ATTRIBUTE), ToXMLCh(REPORT_TYPE)); element->setAttribute(ToXMLCh(TITLE_ATTRIBUTE), ToXMLCh(REPORT_TITLE)); DateTime now = DateTime::GetCurrentDateTime(); element->setAttribute(ToXMLCh(DATE_ATTRIBUTE), ToXMLCh(now.ToString())); return element; } /* Add the name/value pairs to the document. */ static void AddNameValuePairs(const char *title, ParameterNameValuePairList ¶ms, DOMDocument* doc, DOMElement* parentElement) { if (params.size() == 0) return; DOMElement* element = doc->createElement(ToXMLCh(NAME_VALUE_PAIRS_ELEMENT_NAME)); element->setAttribute(ToXMLCh(NAME_ATTRIBUTE), ToXMLCh(title)); for (ParameterNameValuePairList::iterator it=params.begin(); it!=params.end(); it++) { ParameterNameValuePair &pair = *it; DOMElement* pairElement = doc->createElement(ToXMLCh(NAME_VALUE_PAIR_ELEMENT_NAME)); pairElement->setAttribute(ToXMLCh(NAME_ATTRIBUTE), ToXMLCh(pair.Name)); pairElement->setAttribute(ToXMLCh(VALUE_ATTRIBUTE), ToXMLCh(pair.Value)); element->appendChild(pairElement); } parentElement->appendChild(element); } /* * Add an empty table (required by the DTD). */ static void AddBlankReportTable(DOMDocument* doc, DOMElement* parentElement) { DOMElement* element = doc->createElement(ToXMLCh(REPORT_RESULTS_ELEMENT_NAME)); element->setAttribute(ToXMLCh(NAME_ATTRIBUTE), ToXMLCh("NA")); element->setAttribute(ToXMLCh(ROWS_ATTRIBUTE), ToXMLCh("0")); element->setAttribute(ToXMLCh(COLS_ATTRIBUTE), ToXMLCh("0")); parentElement->appendChild(element); } /* Write the file. */ bool GQCFileData::Write(const std::string &fileName) { // Initialize the XML4C2 system. try { XMLPlatformUtils::Initialize(); } catch (const XMLException&) { return false; } // Create a DOM implementation object and create the document type for it. DOMImplementation *impl = DOMImplementationRegistry::getDOMImplementation(ToXMLCh(L"LS")); DOMDocument* doc = impl->createDocument(); //doc->setStandalone(true); // Create the serializer. DOMLSSerializer *theSerializer = ((DOMImplementationLS*)impl)->createLSSerializer(); DOMLSOutput *theOutputDesc = ((DOMImplementationLS*)impl)->createLSOutput(); //theSerializer->setEncoding(ToXMLCh(GENERIC_REPORT_FILE_ENCODING)); theOutputDesc->setEncoding(ToXMLCh(GENERIC_REPORT_FILE_ENCODING)); // Create the root element DOMElement *rootElement = CreateGenericReportElement(doc); // store the parameters AddNameValuePairs(ANALYSIS_PARAMETERS, analysisParameters, doc, rootElement); AddNameValuePairs(QC_RESULTS, qcResults, doc, rootElement); AddNameValuePairs(SAMPLE_SIGNATURE, sampleSignature, doc, rootElement); // Add an empty table (required by the DTD) AddBlankReportTable(doc, rootElement); // Store the element to the document. doc->appendChild(rootElement); // Write the file. bool status = false; XMLFormatTarget *myFormTarget = new LocalFileFormatTarget(fileName.c_str()); theOutputDesc->setByteStream(myFormTarget); try { theSerializer->write(doc, theOutputDesc); status = true; } catch (...) { status = false; } // Clean up doc->release(); theOutputDesc->release(); theSerializer->release(); delete myFormTarget; XMLPlatformUtils::Terminate(); return status; } affxparser/src/fusion/file/GQCFileData.h0000644000175200017520000000446714516003651021137 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2007 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GQCFileData_HEADER_ #define _GQCFileData_HEADER_ #include "calvin_files/parameter/src/Parameter.h" // #include #include // namespace affxgqc { /*! Used to store information in a GQC file. */ class GQCFileData { public: /*! Constructor */ GQCFileData(); /*! Destructor */ ~GQCFileData(); /*! Clear the data. */ void Clear(); /*! The QC analysis results. */ affymetrix_calvin_parameter::ParameterNameValuePairList &QCResults() { return qcResults; } /*! The sample signature */ affymetrix_calvin_parameter::ParameterNameValuePairList &SampleSignature() { return sampleSignature; } /*! The analysis parameters */ affymetrix_calvin_parameter::ParameterNameValuePairList &AnalysisParameters() { return analysisParameters; } /*! Reads the entire contents of the file. * @param fileName The name of the file to read. * @return true if successful */ bool Read(const std::string &fileName); /*! Write the data to the file. * @param fileName The name of the file to read. * @return true if successful */ bool Write(const std::string &fileName); private: /*! The QC analysis results. */ affymetrix_calvin_parameter::ParameterNameValuePairList qcResults; /*! The sample signature */ affymetrix_calvin_parameter::ParameterNameValuePairList sampleSignature; /*! The analysis parameters */ affymetrix_calvin_parameter::ParameterNameValuePairList analysisParameters; }; }; #endif affxparser/src/fusion/file/GRCFileData.cpp0000644000175200017520000001254614516003651021470 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/GRCFileData.h" // #include "file/FileIO.h" // #include #include #include #include // #ifdef _INCLUDE_UNISTD_HEADER_ #include #endif using namespace affymetrix_grid_control; using namespace affxgrc; using namespace std; ////////////////////////////////////////////////////////////////////// /* * Initialize the class to null values */ CGRCFileData::CGRCFileData() { } ////////////////////////////////////////////////////////////////////// /* * Clean up any used memory */ CGRCFileData::~CGRCFileData() { } ////////////////////////////////////////////////////////////////////// /* * Check for the file existance */ bool CGRCFileData::Exists() { // Find the file stats. struct stat st; return (stat(m_strFileName.c_str(), &st) == 0); } ////////////////////////////////////////////////////////////////////// /* * Read the file contents, either pre-release or release format. */ bool CGRCFileData::Read(GridControlData &grid) { grid.Clear(); if (IsReleaseFormat() == true) return ReadReleaseFormat(grid); else return ReadPreReleaseFormat(grid); } ////////////////////////////////////////////////////////////////////// /* * Checks the version and magic numbers for release values */ bool CGRCFileData::IsReleaseFormat() { // Read the version and magic ifstream grc(m_strFileName.c_str(), ios::in | ios::binary); uint8_t ver=0; uint8_t magic=0; ReadUInt8(grc, magic); ReadUInt8(grc, ver); grc.close(); return (ver == GRC_VERSION && magic == GRC_MAGIC); } ////////////////////////////////////////////////////////////////////// /* * Read the older pre-release (early access software) GRC file */ bool CGRCFileData::ReadPreReleaseFormat(GridControlData &grid) { // Open the grc file. ifstream grc(m_strFileName.c_str(), ios::in | ios::binary); if (!grc) { return false; } // Read the counts. int32_t i32val; ReadInt32_I(grc, i32val); grid.SetColumns(i32val); ReadInt32_I(grc, i32val); grid.SetRows(i32val); ReadInt32_I(grc, i32val); ReadInt32_I(grc, i32val); grid.ResizeB2(i32val); ReadInt32_I(grc, i32val); grid.ResizeB1(i32val); ReadInt32_I(grc, i32val); grid.ResizeNS(i32val); ReadInt32_I(grc, i32val); m_Version = 0; // Read the unused data. int i; int16_t sval; int lChipSize = grid.GetColumns() * grid.GetRows(); for (i=0; i #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affxgrc { /*! The current version of the GRC file */ #define GRC_VERSION 1 /*! The magic number of the GRC file */ #define GRC_MAGIC 129 ////////////////////////////////////////////////////////////////////// /*! This class holds the coordinates of those probes used by the GCAST algorithm. */ class CGRCFileData { public: /*! Constructor */ CGRCFileData(); /*! Destructor */ ~CGRCFileData(); protected: /*! The name of the GRC file */ std::string m_strFileName; /*! The version number of the file */ int32_t m_Version; /*! Checks if the file is of a release format * * @return True if release format (not early access) */ bool IsReleaseFormat(); /*! Reads a pre-release (early access) format GRC file. * * @param grid The grid control data to fill from the GRC file. * @return True if successful */ bool ReadPreReleaseFormat(affymetrix_grid_control::GridControlData &grid); /*! Reads a release format GRC file. * * @param grid The grid control data to fill from the GRC file. * @return True if successful */ bool ReadReleaseFormat(affymetrix_grid_control::GridControlData &grid); public: /*! Sets the file name. * @param name The full path to the GRC file. */ void SetFileName (const char *name) { m_strFileName = name; } /*! Gets the file name. * @return The full path to the GRC file. */ const std::string &GetFileName() const { return m_strFileName; } /*! Reads the contents of the file. * @param grid The grid control data to fill from the GRC file. * @return True if successful. */ bool Read(affymetrix_grid_control::GridControlData &grid); /*! Checks for the existance of a file. * @return True if the file exists. */ bool Exists(); }; //////////////////////////////////////////////////////////////////// } // namespace ////////////////////////////////////////////////////////////////////// #endif affxparser/src/fusion/file/GRCFileWriter.cpp0000644000175200017520000001766014516003651022075 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/GRCFileWriter.h" // #include "file/FileWriter.h" // #include // ////////////////////////////////////////////////////////////////////// using namespace affxgrc; using namespace affx1lq; using namespace std; using namespace affymetrix_grid_control; ////////////////////////////////////////////////////////////////////// /*! The probe sequence for a non-synthesized probe */ #define STR_NONSYNTH "!" /*! The B2 probe sequences (25mer) */ #define STR_CTRLB2_25 "CAGCAGTTCTACGATGGCAAGTCCT" /*! The B2 probe sequences (21mer) */ #define STR_CTRLB2_21 "GCAGTTCTACGATGGCAAGTC" /*! The B2 probe sequences Extended */ #define STR_EXTENDB2_11 "CCGTTCAGCAGTACTACGATGGCAA" /*! The B1 probe sequences (25mer) */ #define STR_CTRLB1_25 "AGGACTTGCCATCGTAGAACTGCTG" /*! The B1 probe sequences (21mer) */ #define STR_CTRLB1_21 "GACTTGCCATCGTAGAACTGC" /*! Qualifier for extended B2 */ #define STR_EXTENDB2_QUALIFIER "extend948" /*! The probe length of the B2 extended probes. */ #define STR_EXTENDB2_PROBE_LENGTH 24 ////////////////////////////////////////////////////////////////////// /* * Initialize the class to null values */ CGRCFileWriter::CGRCFileWriter() { } /* * Clean up any used memory */ CGRCFileWriter::~CGRCFileWriter() { } /* * Extract the data from the 1LQ file. */ void CGRCFileWriter::PopulateData(const C1LQFileData &libData) { ParseLibData(libData); } /* * Sets the B1 probe at the given index. */ void CGRCFileWriter::SetB1(int index, int x, int y) { FeatureCoordinate coord; coord.x = x; coord.y = y; if (index < m_Grid.GetNumB1Probes()) m_Grid.SetB1(index, coord); } /* * Sets the B2 probe at the given index. */ void CGRCFileWriter::SetB2(int index, int x, int y) { FeatureCoordinate coord; coord.x = x; coord.y = y; if (index < m_Grid.GetNumB2Probes()) m_Grid.SetB2(index, coord); } /* * Sets the NS probe at the given index. */ void CGRCFileWriter::SetNS(int index, int x, int y) { FeatureCoordinate coord; coord.x = x; coord.y = y; if (index < m_Grid.GetNumNSProbes()) m_Grid.SetNS(index, coord); } /* * Extract the data from the 1LQ file. * If shift mask design then * put the extended B2's in the B1 bin. */ void CGRCFileWriter::ParseLibData(const C1LQFileData &libData) { m_Grid.Clear(); m_Grid.SetColumns(libData.GetNumberColumns()); m_Grid.SetRows(libData.GetNumberRows()); int nNS=0; int nB1=0; int nB2=0; bool bShiftMask=false; const list &entries = libData.GetEntries(); // Determine if shift mask design. for (list::const_iterator it = entries.begin(); bShiftMask == false && it != entries.end(); it++) { DataEntry entry = *it; if ( entry.probe == STR_EXTENDB2_11 || (entry.qualifier == STR_EXTENDB2_QUALIFIER && entry.plength == STR_EXTENDB2_PROBE_LENGTH)) bShiftMask = true; } // Count the number of probes in each bin. for (list::const_iterator it = entries.begin(); it != entries.end(); it++) { DataEntry entry = *it; // Increment the counters for the non-synth probes. if(entry.probe == STR_NONSYNTH) { nNS++; } // Increment B2 if shift mask B2 probe. else if (bShiftMask == true && (entry.qualifier == STR_EXTENDB2_QUALIFIER && entry.plength == STR_EXTENDB2_PROBE_LENGTH)) { nB2++; } // Increment B1 bin if shift mask ext B2 probe. else if (bShiftMask == true && entry.probe == STR_EXTENDB2_11) { nB1++; } // Increment B2 bin if B2 probe. else if (entry.probe == STR_CTRLB2_25 || entry.probe == STR_CTRLB2_21) { nB2++; } // Increment B1 bin if checkerboard design and B1 probe. else if (bShiftMask == false && (entry.probe == STR_CTRLB1_25 || entry.probe == STR_CTRLB1_21)) { nB1++; } } // Allocate memory for the arrays m_Grid.ResizeB1(nB1); m_Grid.ResizeB2(nB2); m_Grid.ResizeNS(nNS); // Extract the coordinates from the 1LQ data. int indexB1=0; int indexB2=0; int indexns=0; FeatureCoordinate coord; for (list::const_iterator it = entries.begin(); it != entries.end(); it++) { DataEntry entry = *it; // Store the coordinates. if(entry.probe == STR_NONSYNTH) { coord.x = entry.x; coord.y = entry.y; m_Grid.SetNS(indexns, coord); ++indexns; } else if (bShiftMask == true && (entry.qualifier == STR_EXTENDB2_QUALIFIER) && (entry.plength == STR_EXTENDB2_PROBE_LENGTH)) { coord.x = entry.x; coord.y = entry.y; m_Grid.SetB2(indexB2, coord); ++indexB2; } else if (bShiftMask == true && entry.probe == STR_EXTENDB2_11) { coord.x = entry.x; coord.y = entry.y; m_Grid.SetB1(indexB1, coord); ++indexB1; } else if (entry.probe == STR_CTRLB2_25 || entry.probe == STR_CTRLB2_21) { coord.x = entry.x; coord.y = entry.y; m_Grid.SetB2(indexB2, coord); ++indexB2; } else if (bShiftMask == false && (entry.probe == STR_CTRLB1_25 || entry.probe == STR_CTRLB1_21)) { coord.x = entry.x; coord.y = entry.y; m_Grid.SetB1(indexB1, coord); ++indexB1; } } } /* * Extract the data from the 1LQ file and write the GRC file */ bool CGRCFileWriter::Write(const C1LQFileData &libData, bool opposite /* = false */) { ParseLibData(libData); return WriteFile(opposite); } /* * write the data to a GRC file */ bool CGRCFileWriter::Write(bool opposite /* = false */) { return WriteFile(opposite); } /* * Write the GRC file * If not opposite, print B2 first, then B1 * Otherwise, print B1 first, then B2 */ bool CGRCFileWriter::WriteFile(bool opposite /* = false */) { // Open the grc file. ofstream grc(m_strFileName.c_str(), ios::out | ios::binary); if (!grc) { return false; } // Write the magic/version uint8_t ver=GRC_VERSION; uint8_t magic=GRC_MAGIC; WriteUInt8(grc, magic); WriteUInt8(grc, ver); // Write the counts. WriteInt32_N(grc, m_Grid.GetColumns()); WriteInt32_N(grc, m_Grid.GetRows()); if (!opposite) { WriteInt32_N(grc, m_Grid.GetNumB2Probes()); WriteInt32_N(grc, m_Grid.GetNumB1Probes()); } else { WriteInt32_N(grc, m_Grid.GetNumB1Probes()); WriteInt32_N(grc, m_Grid.GetNumB2Probes()); } WriteInt32_N(grc, m_Grid.GetNumNSProbes()); // Write the coordinates. uint16_t sval; int n; if (!opposite) { n=m_Grid.GetNumB2Probes(); for (int i=0; i #include #include #include #include #include // #ifndef _MSC_VER #include #endif /*! The file identifier size */ #define IDENTIFIER_SIZE 8 /*! The file identifier */ #define GRD_FILE_IDENTIFIER "\211GRD\r\n\032\n" /*! The file version expected. */ #define FILE_VERSION 1.0f /*! Use the GRD namespace. */ using namespace affxgrd; ////////////////////////////////////////////////////////////////////// const char affxgrd::SZ_PARENT_DAT_PROP_NAME[]="Parent DAT File"; const char affxgrd::SZ_SCAN_DATE_TIME_PROP_NAME[]= "Scan Date-Time"; const char affxgrd::SZ_SCANNER_ID_PROP_NAME[]="Scanner ID"; void affxgrd_use_SZ_arrays_above_to_supress_warnings() { const char* junk; junk=affxgrd::SZ_PARENT_DAT_PROP_NAME; junk=affxgrd::SZ_SCAN_DATE_TIME_PROP_NAME; junk=affxgrd::SZ_SCANNER_ID_PROP_NAME; } /* * Initialize the header object to zero values. */ CGRDFileHeaderData::CGRDFileHeaderData() { m_Version = 0.0f; m_nCols = 0; m_nRows = 0; m_fFeaturePitchX = 0.0f; m_fFeaturePitchY = 0.0f; m_fFeatureSetbackX = 0.0f; m_fFeatureSetbackY = 0.0f; } /* * Read each of the header sections. */ bool CGRDFileData::ReadHeader(char *pData) { // Initialize the header size m_HeaderData.m_nHeaderBytes = 0; if( ReadFixedHeader( pData ) == false ) return false; if( ReadTagHeader( pData ) == false ) return false; if( ReadOptimumCornerHeader( pData ) == false ) return false; return true; } /* * Read the fixed header section. */ bool CGRDFileData::ReadFixedHeader(char* pData) { // Check that this is a GRD file if( memcmp(pData + m_HeaderData.m_nHeaderBytes, GRD_FILE_IDENTIFIER, IDENTIFIER_SIZE) != 0 ) { m_strError = "File does not match GRD format"; return false; } m_HeaderData.m_nHeaderBytes += IDENTIFIER_SIZE; // Extract version number m_HeaderData.m_Version = MmGetFloat_N((float *)(pData + m_HeaderData.m_nHeaderBytes)); m_HeaderData.m_nHeaderBytes += FLOAT_SIZE; // Extract the remainder of the header. m_HeaderData.m_nRows = MmGetUInt32_N((uint32_t *)(pData + m_HeaderData.m_nHeaderBytes)); m_HeaderData.m_nHeaderBytes += INT32_SIZE; m_HeaderData.m_nCols = MmGetUInt32_N((uint32_t *)(pData + m_HeaderData.m_nHeaderBytes)); m_HeaderData.m_nHeaderBytes += INT32_SIZE; m_HeaderData.m_fFeaturePitchX = MmGetFloat_N((float *)(pData + m_HeaderData.m_nHeaderBytes)); m_HeaderData.m_nHeaderBytes += FLOAT_SIZE; m_HeaderData.m_fFeaturePitchY = MmGetFloat_N((float *)(pData + m_HeaderData.m_nHeaderBytes)); m_HeaderData.m_nHeaderBytes += FLOAT_SIZE; m_HeaderData.m_fFeatureSetbackX = MmGetFloat_N((float *)(pData + m_HeaderData.m_nHeaderBytes)); m_HeaderData.m_nHeaderBytes += FLOAT_SIZE; m_HeaderData.m_fFeatureSetbackY = MmGetFloat_N((float *)(pData + m_HeaderData.m_nHeaderBytes)); m_HeaderData.m_nHeaderBytes += FLOAT_SIZE; return true; } /* * Read the parameter header section. */ bool CGRDFileData::ReadTagHeader(char* pData) { // Extract the number of bytes in the tag header section. // uint32_t numBtyes = MmGetUInt32_N((uint32_t *)(pData + m_HeaderData.m_nHeaderBytes)); m_HeaderData.m_nHeaderBytes += INT32_SIZE; // Extract the number of name-value pairs uint32_t numNVP = MmGetUInt32_N((uint32_t *)(pData + m_HeaderData.m_nHeaderBytes)); m_HeaderData.m_nHeaderBytes += INT32_SIZE; m_HeaderData.m_props.clear(); std::string sName; std::string sValue; int32_t len; char *buf; // Extract the name-value pairs for( uint32_t iNVP=0; iNVP #endif #include "portability/affy-base-types.h" // #include #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affxgrd { ////////////////////////////////////////////////////////////////////// /*! The name of the parent DAT file parameter. */ // const char SZ_PARENT_DAT_PROP_NAME[] = "Parent DAT File"; extern const char SZ_PARENT_DAT_PROP_NAME[]; /*! The name of the scan date-time parameter. */ //const char SZ_SCAN_DATE_TIME_PROP_NAME[] = "Scan Date-Time"; extern const char SZ_SCAN_DATE_TIME_PROP_NAME[]; /*! The name of the scanner id parameter. */ // const char SZ_SCANNER_ID_PROP_NAME[] = "Scanner ID"; extern const char SZ_SCANNER_ID_PROP_NAME[]; /*! Floating point coordinates */ struct FCOORD { float fx; /*! The X coordinate */ float fy; /*! The Y coordinate */ }; /*! Coordinates to define a grid */ struct FRECT { FCOORD ul; /*! Upper left coordinate */ FCOORD ur; /*! Upper right coordinate */ FCOORD ll; /*! Lower left coordinate */ FCOORD lr; /*! Lower right coordinate */ }; /*! Name value parameters. */ typedef std::map StrStrMap; /*! An STL vector of grid coordinates. */ typedef std::vector FRECTArray; /*! This class provides storage capabilities for the GRD file header. */ class CGRDFileHeaderData { protected: /*! The file version number. */ float m_Version; /*! The number of columns of features on the array. */ uint32_t m_nCols; /*! The number of rows of features on the array. */ uint32_t m_nRows; /*! The feature pitch in the X direction. */ float m_fFeaturePitchX; /*! The feature pitch in the Y direction. */ float m_fFeaturePitchY; /*! The feature setback in the X direction. */ float m_fFeatureSetbackX; /*! The feature setback in the Y direction. */ float m_fFeatureSetbackY; /*! A map of parameter name/value pairs. */ StrStrMap m_props; /*! The number of columns of features on the array. */ FRECTArray m_optSubgridCoords; /*! The size of the header in bytes. */ uint32_t m_nHeaderBytes; /*! Friend to the high level file class. This allows the class * to set the protected members thus making this a read-only * class. */ friend class CGRDFileData; public: /*! Constructor */ CGRDFileHeaderData(); /*! Gets the version number of the file. * @return The version number of the file. */ float GetVersion() const { return m_Version; } /*! Sets the version number of the file. * @param value The version number of the file. */ void SetVersion(float value) { m_Version = value; } /*! Gets the number of columns of features on the array. * @return The number of columns of features on the array. */ uint32_t GetCols() const { return m_nCols; } /*! Sets the number of columns of features on the array. * @param value The number of columns of features on the array. */ void SetCols(uint32_t value) { m_nCols = value; } /*! Gets the number of rows of features on the array. * @return The number of rows of features on the array. */ uint32_t GetRows() const { return m_nRows; } /*! Sets the number of rows of features on the array. * @param value The number of rows of features on the array. */ void SetRows(uint32_t value) { m_nRows = value; } /*! Gets the number of features on the array. * @return The number of features on the array. */ uint32_t GetNumCells() const { return m_nRows*m_nCols; } /*! Gets the feature pitch in the X direction. * @return The feature pitch in the X direction. */ float GetFeaturePitchX() const { return m_fFeaturePitchX; } /*! Sets the feature pitch in the X direction. * @param value The feature pitch in the X direction. */ void SetFeaturePitchX(float value) { m_fFeaturePitchX = value; } /*! Gets the feature pitch in the Y direction. * @return The feature pitch in the Y direction. */ float GetFeaturePitchY() const { return m_fFeaturePitchY; } /*! Sets the feature pitch in the Y direction. * @param value The feature pitch in the Y direction. */ void SetFeaturePitchY(float value) { m_fFeaturePitchY = value; } /*! Gets the feature setback in the X direction. * @return The feature setback in the X direction. */ float GetFeatureSetbackX() const { return m_fFeatureSetbackX; } /*! Sets the feature setback in the X direction. * @param value The feature setback in the X direction. */ void SetFeatureSetbackX(float value) { m_fFeatureSetbackX = value; } /*! Gets the feature setback in the Y direction. * @return The feature setback in the Y direction. */ float GetFeatureSetbackY() const { return m_fFeatureSetbackY; } /*! Sets the feature setback in the Y direction. * @param value The feature setback in the Y direction. */ void SetFeatureSetbackY(float value) { m_fFeatureSetbackY = value; } /*! Gets the number of sub-grids. * @return The number of sub-grids. */ uint32_t GetNumSubgrids() const { return (uint32_t) m_optSubgridCoords.size(); } /*! Gets the optimized coordinates of a sub-grid. * @param nIndex The index to the sub-grid of interest. * @return The sub-grid. */ FRECT GetOptSubgrid(int nIndex) { return m_optSubgridCoords[nIndex]; } /*! Add the optimized coordinates of a sub-grid. * @param subgrid The optimized subgrid coordinates. * @return Returns true if the subgrid was successfully added. */ bool AddOptSubgrid(FRECT* subgrid) { if (subgrid == NULL) return false; m_optSubgridCoords.push_back(*subgrid); return true;} /*! Remove all optimized subgrid coordinates. */ void RemoveOptSubgrids() { m_optSubgridCoords.clear(); } /*! Gets the parameter name/value pairs as a map. * @return The parameters. */ StrStrMap &GetParameters() { return m_props; } }; /*! This class provide parsing and storage capabilities for the GRD files. */ class CGRDFileData { public: /*! Constructor */ CGRDFileData(); /*! Destructor */ ~CGRDFileData(); /*! Sets the file name parameters. * @param str The full path to the GRD file. */ void SetFileName(const char *str) { m_FileName = str; } /*! Gets the file name. * @return The full path of the GRD file. */ std::string GetFileName() const { return m_FileName; } /*! Gets the error message associated with a read error. * @return The error message. */ std::string GetError() const { return m_strError; } /*! Gets the header. * @return The header object. */ CGRDFileHeaderData &GetHeader() { return m_HeaderData; } /*! Gets the center position for the given feature. * @param x The feature X coordinate. * @param y The feature Y coordinate. * @return The center position. */ FCOORD GetCenter( int x, int y ); /*! Checks if the file exists. * @return True if exists. */ bool Exists(); /*! Reads the contents of the GRD file. * @return True if successful. */ bool Read(); /*! Closes the file. The data is not accessible after the file is closed. */ void Close(); protected: /*! Opens the file. * @return True if successful. */ bool Open(); /*! Reads the header of the file using memory mapping. * @param pData The memory mapped data pointer. * @return True if successful. */ bool ReadHeader(char *pData); /*! Reads the the fixed part of the header. * @param pData The memory mapped data pointer. * @return True if successful. */ bool ReadFixedHeader(char* pData ); /*! Reads the name/value parameters from the header. * @param pData The memory mapped data pointer. * @return True if successful. */ bool ReadTagHeader(char *pData); /*! Reads the optimum corners from the header. * @param pData The memory mapped data pointer. * @return True if successful. */ bool ReadOptimumCornerHeader(char* pData); /*! A pointer to the memory mapped file. */ void *m_lpFileMap; /*! A pointer to the memory mapped file. */ char *m_lpData; #ifdef _MSC_VER /*! A file handle. */ HANDLE m_hFileMap; /*! Windows handle to the file. */ HANDLE m_hFile; #else /*! File pointer for memory mapping. */ FILE *m_fp; /*! The size of the file. */ int m_MapLen; #endif /*! Flag indicating if the file is open. */ bool m_bFileOpen; /*! Flag indicating if the file is memory mapped. */ bool m_bFileMapped; /*! An string to hold error message upon read failures. */ std::string m_strError; /*! The file name. */ std::string m_FileName; /*! The file header. */ CGRDFileHeaderData m_HeaderData; }; }; ////////////////////////////////////////////////////////////////////// #endif // _GRDFileData_HEADER_ affxparser/src/fusion/file/GRDFileWriter.cpp0000644000175200017520000001143314516003651022066 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/GRDFileWriter.h" // #include "file/FileIO.h" #include "file/FileWriter.h" // #include #include #include #include #include #include #include #include #include // using namespace affxgrd; #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #endif #define IDENTIFIER_SIZE 8 #define GRD_FILE_IDENTIFIER "\211GRD\r\n\032\n" const int GRD_FILE_IDENTIFIER_LEN = 8; #define FILE_VERSION 1.0f ////////////////////////////////////////////////////////////////////// bool CGRDFileWriter::WriteHeader() { if (WriteFixedHeader() == false) return false; if (WriteTagHeader() == false) return false; if (WriteOptimumCorners() == false) return false; return true; } ////////////////////////////////////////////////////////////////////// bool CGRDFileWriter::WriteFixedHeader() { // Write the header. m_outstr.write(GRD_FILE_IDENTIFIER, GRD_FILE_IDENTIFIER_LEN); WriteFloat_N(m_outstr, m_HeaderData.GetVersion()); // Write the dimensions of the array WriteUInt32_N(m_outstr, m_HeaderData.GetRows()); WriteUInt32_N(m_outstr, m_HeaderData.GetCols()); // Write the other members. WriteFloat_N(m_outstr, m_HeaderData.GetFeaturePitchX()); WriteFloat_N(m_outstr, m_HeaderData.GetFeaturePitchY()); WriteFloat_N(m_outstr, m_HeaderData.GetFeatureSetbackX()); WriteFloat_N(m_outstr, m_HeaderData.GetFeatureSetbackY()); // Check the status. if (m_outstr.fail()) return false; return true; } ////////////////////////////////////////////////////////////////////// bool CGRDFileWriter::WriteTagHeader() { // write the number of bytes in the section - placeholder std::streampos posStart = m_outstr.tellp(); WriteUInt32_N(m_outstr, 0L); StrStrMap& props = m_HeaderData.GetParameters(); // write the number of name-value pair strings. WriteUInt32_N(m_outstr, props.size()); for( StrStrMap::iterator ii = props.begin(); ii != props.end(); ++ii ) { WriteString_N(m_outstr, ii->first); WriteString_N(m_outstr, ii->second); } // go back and write the number of bytes std::streampos posCurr = m_outstr.tellp(); m_outstr.seekp(posStart); WriteUInt32_N(m_outstr, posCurr - posStart); m_outstr.seekp(posCurr); // Check the status. if (m_outstr.fail()) return false; return true; } ////////////////////////////////////////////////////////////////////// bool CGRDFileWriter::WriteOptimumCorners() { const uint32_t nSubgridCoordSize = FLOAT_SIZE*8; // write the number of bytes in the section WriteUInt32_N(m_outstr, nSubgridCoordSize*m_HeaderData.GetNumSubgrids()+ULONG_SIZE*2); // write the number of subgrids. WriteUInt32_N(m_outstr, m_HeaderData.GetNumSubgrids()); // write subgrids for (uint32_t i = 0; i < m_HeaderData.GetNumSubgrids(); ++i ) { FRECT r = m_HeaderData.GetOptSubgrid(i); WriteFloat_N(m_outstr, r.ul.fx); WriteFloat_N(m_outstr, r.ul.fy); WriteFloat_N(m_outstr, r.ur.fx); WriteFloat_N(m_outstr, r.ur.fy); WriteFloat_N(m_outstr, r.ll.fx); WriteFloat_N(m_outstr, r.ll.fy); WriteFloat_N(m_outstr, r.lr.fx); WriteFloat_N(m_outstr, r.lr.fy); } // Check the status. if (m_outstr.fail()) return false; return true; } ////////////////////////////////////////////////////////////////////// bool CGRDFileWriter::CreateNewFile() { // Create a new file. m_outstr.open(m_FileName.c_str(), std::ios::out | std::ios::binary); if (m_outstr) { return WriteHeader(); } return false; } ////////////////////////////////////////////////////////////////////// bool CGRDFileWriter::WriteCenter(const FCOORD& center) { // Write the center data. WriteFloat_N(m_outstr, center.fx); WriteFloat_N(m_outstr, center.fy); // Check the status. if (m_outstr.fail()) return false; return true; } ////////////////////////////////////////////////////////////////////// bool CGRDFileWriter::CloseNewFile() { // Close the file. m_outstr.close(); // Check the status. if (m_outstr.fail()) return false; return true; } affxparser/src/fusion/file/GRDFileWriter.h0000644000175200017520000000474114516003651021537 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GRDFILEWRITER_H_ #define _GRDFILEWRITER_H_ /*! \file GRDFileWriter.h This file provides GRD file writing capabilities. */ #include "file/GRDFileData.h" // #include // namespace affxgrd { /*! This class provides methods to write a GRD file. */ class CGRDFileWriter : public CGRDFileData { public: /*! Creates the file set in SetFileName and writes the header. * @return Returns true if the file was successfully created and the header was successfully written. */ bool CreateNewFile(); /*! Writes a center coordinate to an newly created GRD file. May only be called after a successful call to CreateNewFile. * @return Returns true if the center coordinate was successfully written. */ bool WriteCenter(const FCOORD& center); /*! Closes the newly created file. * @return Returns true if the new file was successfully closed. */ bool CloseNewFile(); protected: /*! Writes the header to the open file. * @return Returns true if the header was sucessfully written. */ bool WriteHeader(); /*! Writes the fixed section of the file header to the open file. * @return Returns true if the fixed header was successfully written. */ bool WriteFixedHeader(); /*! Writes the tag header of the file header to the open file. * @return Returns true if the tag header was successfully written. */ bool WriteTagHeader(); /*! Writes the optimized subgrid coordinates header of the file header to the open file. * @return Returns true if the optimized subgrid coordinates header was successfully written. */ bool WriteOptimumCorners(); protected: /*! stream for writing files */ std::ofstream m_outstr; }; } #endif affxparser/src/fusion/file/GridControlData.h0000644000175200017520000001105614516003651022143 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GridControlData_HEADER_ #define _GridControlData_HEADER_ /*! \file GridControlData.h This file provides interfaces to store information in a GRC (grid control) file. */ #include #include #include // namespace affymetrix_grid_control { /*! This structure provides a feature coordinate definition. */ typedef struct _FeatureCoordinate { /*! The X cooridnate. */ int x; /*! The Y cooridnate. */ int y; } FeatureCoordinate; /*! This class provides interfaces to store GRC file information. */ class GridControlData { public: /*! Constructor */ GridControlData() {}; /*! Destructor */ ~GridControlData() { Clear(); }; protected: /*! An array of coordinates for the B1 probes. */ std::vector probesB1; /*! An array of coordinates for the B2 probes. */ std::vector probesB2; /*! An array of coordinates for the non-synthesized features. */ std::vector probesNS; /*! The number of columns of features on the array */ int arrayColumns; /*! The number of rows of features on the array */ int arrayRows; public: /*! Gets the number of rows. * @return The number of rows. */ int GetRows() const { return arrayRows; } /*! Sets the number of rows. * @param rows The number of rows. */ void SetRows(int rows) { arrayRows = rows; } /*! Gets the number of columns. * @return The number of columns. */ int GetColumns() const { return arrayColumns; } /*! Sets the number of columns. * @param columns The number of columns. */ void SetColumns(int columns) { arrayColumns = columns; } /*! Resizes the B1 array. * @param size The size of the array. */ void ResizeB1(int size) { probesB1.resize(size); } /*! Resizes the B2 array. * @param size The size of the array. */ void ResizeB2(int size) { probesB2.resize(size); } /*! Resizes the NS array. * @param size The size of the array. */ void ResizeNS(int size) { probesNS.resize(size); } /*! Sets the B1 probe at the given index. * @param index The index of the array. * @param coord The new coordinate. */ void SetB1(int index, FeatureCoordinate coord) { probesB1[index] = coord; } /*! Gets the B1 probe at the given index. * @param index The index of the array. * @return coord The coordinate. */ FeatureCoordinate GetB1(int index) const { return probesB1[index]; } /*! Gets the number of B1 probes. * @return The number of B1 probes. */ int GetNumB1Probes() const { return (int) probesB1.size(); } /*! Sets the B2 probe at the given index. * @param index The index of the array. * @param coord The new coordinate. */ void SetB2(int index, FeatureCoordinate coord) { probesB2[index] = coord; } /*! Gets the B2 probe at the given index. * @param index The index of the array. * @return coord The coordinate. */ FeatureCoordinate GetB2(int index) const { return probesB2[index]; } /*! Gets the number of B2 probes. * @return The number of B2 probes. */ int GetNumB2Probes() const { return (int) probesB2.size(); } /*! Sets the NS probe at the given index. * @param index The index of the array. * @param coord The new coordinate. */ void SetNS(int index, FeatureCoordinate coord) { probesNS[index] = coord; } /*! Gets the NS probe at the given index. * @param index The index of the array. * @return coord The coordinate. */ FeatureCoordinate GetNS(int index) const { return probesNS[index]; } /*! Gets the number of NS probes. * @return The number of NS probes. */ int GetNumNSProbes() const { return (int) probesNS.size(); } /*! Clears the member objects. */ void Clear() { probesB1.clear(); probesB2.clear(); probesNS.clear(); arrayColumns=0; arrayRows=0; }; }; }; #endif // _GridControlData_HEADER_ affxparser/src/fusion/file/GridCoordinates.h0000644000175200017520000000332614516003651022204 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _GridCoordinates_HEADER_ #define _GridCoordinates_HEADER_ /*! \file GridCoordinates.h Defines structures to hold the coordinates of a grid. */ #include "portability/affy-base-types.h" ////////////////////////////////////////////////////////////////////// /*! A structure to hold a grid coordinate point. */ typedef struct _CoordinatePoint { /*! The X cooridnate */ int32_t x; /*! The Y coordinate */ int32_t y; } CoordinatePoint; /*! A structure to hold the grid coordinates. */ typedef struct _GridCoordinatesType { /*! The upper left corner. */ CoordinatePoint upperleft; /*! The upper right corner. */ CoordinatePoint upperright; /*! The lower right corner. */ CoordinatePoint lowerright; /*! The lower left corner. */ CoordinatePoint lowerleft; } GridCoordinatesType; ////////////////////////////////////////////////////////////////////// #endif affxparser/src/fusion/file/IniFile.cpp0000644000175200017520000001633114516003651020776 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/IniFile.h" // using namespace std; // Default constructor CIniFile::CIniFile(void) { } // Destructor CIniFile::~CIniFile(void) { } // A function to trim whitespace from both sides of a given string void CIniFile::Trim(std::string& str, const std::string & ChrsToTrim, int TrimDir) { size_t startIndex = str.find_first_not_of(ChrsToTrim); if (startIndex == std::string::npos){str.erase(); return;} if (TrimDir < 2) str = str.substr(startIndex, str.size()-startIndex); if (TrimDir!=1) str = str.substr(0, str.find_last_not_of(ChrsToTrim) + 1); } // Load the contents of the INI file bool CIniFile::Load(std::string FileName, std::vector& content) { string s; // Holds the current line from the ini file string CurrentSection; // Holds the current section name ifstream inFile (FileName.c_str()); // Create an input filestream if (!inFile.is_open()) return false; // If the input file doesn't open, then return content.clear(); // Clear the content vector string comments = ""; // A string to store comments in while(!std::getline(inFile, s).eof()) // Read until the end of the file { Trim(s); // Trim whitespace from the ends if(!s.empty()) // Make sure its not a blank line { Record r; // Define a new record if((s[0]=='#')||(s[0]==';')) // Is this a commented line? { if ((s.find('[')==string::npos)&& // If there is no [ or = (s.find('=')==string::npos)) // Then it's a comment { comments += s + '\n'; // Add the comment to the current comments string } else { r.Commented = s[0]; // Save the comment character s.erase(s.begin()); // Remove the comment for further processing Trim(s); } // Remove any more whitespace } else r.Commented = ' '; // else mark it as not being a comment if(s.find('[')!=string::npos) // Is this line a section? { s.erase(s.begin()); // Erase the leading bracket s.erase(s.find(']')); // Erase the trailing bracket r.Comments = comments; // Add the comments string (if any) comments = ""; // Clear the comments for re-use r.Section = s; // Set the Section value r.Key = ""; // Set the Key value r.Value = ""; // Set the Value value CurrentSection = s; } if(s.find('=')!=string::npos) // Is this line a Key/Value? { r.Comments = comments; // Add the comments string (if any) comments = ""; // Clear the comments for re-use r.Section = CurrentSection; // Set the section to the current Section r.Key = s.substr(0,s.find('=')); // Set the Key value to everything before the = sign r.Value = s.substr(s.find('=')+1); // Set the Value to everything after the = sign } if(comments == "") // Don't add a record yet if its a comment line content.push_back(r); // Add the record to content } } inFile.close(); // Close the file return true; } // Returns all sections names of a given file std::vector CIniFile::GetSectionNames(std::string FileName) { vector data; // Holds the return data vector content; // Holds the current record // Holds the current record if (Load(FileName, content)) // Make sure the file is loaded { for (int i=0;i<(int)content.size();i++) // Loop through the content { if(content[i].Key =="") // If there is no key value, then its a section data.push_back(content[i].Section); // Add the section to the return data } } return data; // Return the data } // Returns all key contents of a specific section std::vector CIniFile::GetSection(std::string SectionName, std::string FileName) { vector data; // Holds the return data vector content; // Holds the current record if (Load(FileName, content)) // Make sure the file is loaded { for (int i=0;i<(int)content.size();i++) // Loop through the content { if((content[i].Section == SectionName) && // If this is the section name we want (content[i].Key != "")) // but not the section name itself data.push_back(content[i]); // Add the record to the return data } } return data; // Return the data } // Returns the contents of a specific key within a given section std::vector CIniFile::GetRecord(std::string KeyName, std::string SectionName, std::string FileName) { vector data; // Holds the return data vector content; // Holds the current record if (Load(FileName, content)) // Make sure the file is loaded { vector::iterator iter = std::find_if(content.begin(), content.end(), CIniFile::RecordSectionKeyIs(SectionName,KeyName)); // Locate the Record if (iter == content.end()) return data; // The Record was not found data.push_back (*iter); // The Record was found } return data; // Return the Record } // Returns the value of a specific key within a given section std::string CIniFile::GetValue(std::string KeyName, std::string SectionName, std::string FileName) { vector content = GetRecord(KeyName,SectionName, FileName); // Get the Record if(!content.empty()) // Make sure there is a value to return return content[0].Value; // And return the value return ""; // No value was found } // Returns all key contents of all sections std::vector CIniFile::GetSections(std::string FileName) { vector data; // Holds the return data vector content; // Holds the current record // Holds the current record if (Load(FileName, content)) // Make sure the file is loaded { for (int i=0;i<(int)content.size();i++) // Loop through the content { if(content[i].Key == "") // If this is a section data.push_back(content[i]); // Add the record to the return data } } return data; // Return the data } // Returns all key contents of the file. std::vector CIniFile::GetAllRecords(std::string FileName) { vector content; // Collection for the Records Load(FileName, content); // Read the file. return content; } affxparser/src/fusion/file/IniFile.h0000644000175200017520000001136714516003651020447 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! \file IniFile.h This file provides reading capaibilities for INI files. */ #ifndef _IniFileData_h_ #define _IniFileData_h_ ////////////////////////////////////////////////////////////////////// #ifdef _MSC_VER #pragma warning(disable: 4786) // identifier was truncated in the debug information #endif #include #include #include #include #include #include #include // /*! This class provides storages and parsing capabilities of INI files. */ class CIniFile { public: /*! Structure for storing INI information */ struct Record { std::string Comments; char Commented; std::string Section; std::string Key; std::string Value; }; /*! Enumerant for identifying characters used for comments */ enum CommentChar { Pound = '#', SemiColon = ';' }; /*! Constructor */ CIniFile(void); /*! Destructor */ virtual ~CIniFile(void); /*! Returns the contents of a specific key within a given section * @param KeyName Key name of interest * @param SectionName Section name of interest * @param FileName Name of file * @return Record structure that contains the contents of specific section and key */ std::vector GetRecord(std::string KeyName, std::string SectionName, std::string FileName); /*! Returns all key contents of a specific section * @param SectionName Section name of interest * @param FileName Name of file * @return Vector of record structures that contain the contents of all keys in a section */ std::vector GetSection(std::string SectionName, std::string FileName); /*! Returns all sections names of a given file * @param FileName Name of file * @return Vector of section names */ std::vector GetSectionNames(std::string FileName); /*! Returns the value of a specific key within a given section * @param KeyName Key name of interest * @param SectionName Section name of interest * @param FileName Name of file * @return String that contains the value associated with a specific key wihtin a given section */ std::string GetValue(std::string KeyName, std::string SectionName, std::string FileName); /*! Returns all key contents of the file. * @param FileName Name of file. * @return Vector of all record structures that contain the contents of all keys in all sections of the file. */ std::vector GetAllRecords(std::string FileName); private: /*! Returns all key contents of all sections * @param FileName Name of file * @return Vector of record structures that contain the key contents all sections */ std::vector GetSections(std::string FileName); /*! Load the contents of the INI file * @param FileName Name of file * @param content Vector of record structures that contain all keys within all sections * @return True if successful */ bool Load(std::string FileName, std::vector& content); /*! Function to trim whitespace from both sides of a given string * @param str String to perform the whitespace trimming on * @param ChrsToTrim String of white space characters to search when trimming input string * @param TrimDir Direction to perform the whitespace trimming on (left, right, both) */ void Trim(std::string& str, const std::string & ChrsToTrim = " \t\n\r", int TrimDir = 0); /*! Structure for determining a match of a given section and key */ struct RecordSectionKeyIs : std::unary_function { /*! The section name */ std::string section_; /*! The section name */ std::string key_; /*! Constructor */ RecordSectionKeyIs(const std::string& section, const std::string& key): section_(section),key_(key){} /*! Operator for checking a section name and key name match * @param rec The record object to perform match against * @return True if successful */ bool operator()( const Record& rec ) const { return ((rec.Section == section_)&&(rec.Key == key_)); } }; }; #endif // _IniFileData_h_ affxparser/src/fusion/file/IntervalEntry.h0000644000175200017520000000362514516003651021734 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! \file IntervalEntry.h This file defines types to store a sequence interval. */ #ifndef _IntervalEntry_HEADER_ #define _IntervalEntry_HEADER_ #include #include #include // /*! A type to define a sequence interval. */ typedef struct _IntervalEntry { /*! The sequence name. */ std::string seq; /*! The probe set name. */ std::string probeSetName; /*! The start position. */ int start; /*! The stop position. */ int stop; /*! The overlap score. */ float overlap; /*! The strand, either "+" or "-". */ char strand; /*! The size of the region. Assumes 0 based index for start and 1 based for stop. * @return The size of the region. */ int size() const { return stop - start; } } IntervalEntry; /*! An STL list of interval entries. */ typedef std::list IntervalEntryList; /*! An STL list iterator of interval entries. */ typedef std::list::iterator IntervalEntryListIt; /*! An STL list iterator of interval entries. */ typedef std::list::const_iterator IntervalEntryListConstIt; #endif affxparser/src/fusion/file/MDLFileData.cpp0000644000175200017520000003670414516003651021473 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "file/MDLFileData.h" // #include "file/FileIO.h" #include "file/FileWriter.h" // #include #include #include #include #include #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #endif using namespace std; using namespace affxmdl; ////////////////////////////////////////////////////////////////////// static void SwapToLittleEndian(MDLData &data) { // @todo: This breaks type punning. // This is a double (64bit) // The high and low words need to be flipped. float *f = (float *)&data.Affinity; f[0] = MmGetFloat_I(&f[0]); f[1] = MmGetFloat_I(&f[1]); #if BYTE_ORDER == BIG_ENDIAN float temp = f[0]; f[0] = f[1]; f[1] = temp; #endif data.RelativeBkg = MmGetFloat_I(&data.RelativeBkg); data.Saturation = MmGetFloat_I(&data.Saturation); data.Offset = MmGetFloat_I(&data.Offset); } ////////////////////////////////////////////////////////////////////// static void SwapFromLittleEndian(MDLData &data) { SwapToLittleEndian(data); } ////////////////////////////////////////////////////////////////////// static void SwapToLittleEndian(MDLHeader &header) { header.nVersion = MmGetInt32_I(&header.nVersion); header.nNumProbes = MmGetInt32_I(&header.nNumProbes); header.nAnalysisType = MmGetInt32_I(&header.nAnalysisType); header.bMasked = MmGetInt32_I(&header.bMasked); } ////////////////////////////////////////////////////////////////////// static void SwapFromLittleEndian(MDLHeader &header) { SwapToLittleEndian(header); } ////////////////////////////////////////////////////////////////////// CMDLFileHeader::CMDLFileHeader(int32_t nProbes, int32_t analysisType) : m_nVersion(PLIER_MODEL_FILE_VERSION), m_nNumProbes(nProbes), m_nAnalysisType(MDL_PLIER), m_bMasked(false) { } ///////////////////////////////////////////////////////////////// CMDLFileHeader &CMDLFileHeader::operator=( CMDLFileHeader &rhs ) { m_nVersion = rhs.GetVersion(); m_nNumProbes = rhs.GetNumProbes(); m_nAnalysisType = rhs.GetAnalysisType(); m_bMasked = rhs.GetMasked(); m_strChipType = rhs.GetChipType(); m_strModifiedDate = rhs.GetModifiedDate(); m_strReserve = rhs.GetReserve(); return *this; } ///////////////////////////////////////////////////////////////// CMDLProbeData::CMDLProbeData() : m_dAffinity(0.0f), m_fRelativeBkg(0.0f), m_fSaturation(0.0f), m_fOffset(0.0f), m_nIndex(0), m_bMapped(false), m_lpData(NULL) { } ///////////////////////////////////////////////////////////////// void CMDLProbeData::MakeShallowCopy(CMDLProbeData &orig) { m_dAffinity = orig.m_dAffinity; m_fRelativeBkg = orig.m_fRelativeBkg; m_fSaturation = orig.m_fSaturation; m_fOffset = orig.m_fOffset; m_bMapped = orig.m_bMapped; m_lpData = orig.m_lpData; } ///////////////////////////////////////////////////////////////// CMDLFileData::CMDLFileData ( int32_t nProbes, double defAffinity, int32_t analysisType ) : m_Header(nProbes, analysisType) { // Allocate probe data if (nProbes > 0) { m_ProbeData.resize(nProbes); CMDLProbeData probeData; probeData.SetAffinity(defAffinity); for (int32_t i=0; i 0) { m_ProbeData.resize(nProbes); CMDLProbeData probeData; probeData.SetAffinity(defAffinity); for (int32_t i=0; inVersion); m_Header.SetNumProbes(pHeader->nNumProbes); m_Header.SetChipType(pHeader->sChipType); m_Header.SetAnalysisType(pHeader->nAnalysisType); m_Header.SetModifiedDate(pHeader->sModifiedDate); m_Header.SetMasked((pHeader->bMasked == 1 ? true : false)); } ///////////////////////////////////////////////////////////////// void CMDLFileData::GetHeaderInfo(MDLHeader* pHeader) { pHeader->nVersion = m_Header.GetVersion(); pHeader->nNumProbes = m_Header.GetNumProbes(); strcpy(pHeader->sChipType, m_Header.GetChipType().c_str()); pHeader->nAnalysisType = m_Header.GetAnalysisType(); strcpy(pHeader->sModifiedDate, m_Header.GetModifiedDate().c_str()); pHeader->bMasked = m_Header.GetMasked(); memset(pHeader->sReserve, 0, RESERVED_SIZE); } ///////////////////////////////////////////////////////////////// void CMDLFileData::SetDataInfo(MDLData* pData, CMDLProbeData *data) { data->SetAffinity(pData->Affinity); data->SetOffset(pData->Offset); data->SetRelativeBkg(pData->RelativeBkg); data->SetSaturation(pData->Saturation); } ///////////////////////////////////////////////////////////////// void CMDLFileData::GetDataInfo(MDLData* pData, CMDLProbeData &data) { pData->Affinity = data.GetAffinity(); pData->Offset = data.GetOffset(); pData->RelativeBkg = data.GetRelativeBkg(); pData->Saturation = data.GetSaturation(); } ///////////////////////////////////////////////////////////////// void CMDLFileData::GetProbeData(int32_t index, CMDLProbeData& data) { if (index < 0 || index >= m_Header.GetNumProbes()) assert(index >= 0 && index < m_Header.GetNumProbes()); #if defined(_USE_MEM_MAPPING_) && defined(_MSC_VER) if (m_bFileMapped == false) data.MakeShallowCopy(m_ProbeData[index]); else { int32_t offset = index*MDLDATASIZE; char *lpData = m_lpData + m_nHeaderOffset + offset; data.m_nIndex = index; data.m_bMapped = true; data.m_lpData = lpData; data.m_dAffinity = *(double *)(lpData); lpData += (2*FLOAT_SIZE); data.m_fRelativeBkg = *(float *)(lpData); lpData += FLOAT_SIZE; data.m_fSaturation = *(float *)(lpData); lpData += FLOAT_SIZE; data.m_fOffset = *(float *)(lpData); } #else data.MakeShallowCopy(m_ProbeData[index]); #endif } ///////////////////////////////////////////////////////////////// void CMDLFileData::SetProbeData(int32_t index, CMDLProbeData data) { if (index < 0 || index >= m_Header.GetNumProbes() || m_ProbeData.size() <= 0) assert(index >= 0 && index < m_Header.GetNumProbes() && m_ProbeData.size() > 0); m_ProbeData[index] = data; } ///////////////////////////////////////////////////////////////// bool CMDLFileData::Read() { // Check if file exists if (Exists() == false) return false; // Open the file if (Open() == false) { Close(); return false; } return true; } ///////////////////////////////////////////////////////////////// bool CMDLFileData::ReadHeader() { // Check if file exists if (Exists() == false) return false; // Open the file if (Open(true) == false) { Close(); return false; } return true; } ///////////////////////////////////////////////////////////////// bool CMDLFileData::Open(bool bReadHeaderOnly) { // First close the file. Close(); #if defined(_USE_MEM_MAPPING_) && defined(_MSC_VER) return ReadFileUsingMemMap(bReadHeaderOnly); #else return ReadFile(bReadHeaderOnly); #endif } ///////////////////////////////////////////////////////////////// void CMDLFileData::Close() { m_ProbeData.clear(); #if defined(_USE_MEM_MAPPING_) && defined(_MSC_VER) if (m_bFileOpen) { if (m_bFileMapped) { UnmapViewOfFile(m_lpFileMap); m_lpFileMap = NULL; CloseHandle(m_hFileMap); m_hFileMap = NULL; m_lpData = NULL; } CloseHandle(m_hFile); m_hFile = INVALID_HANDLE_VALUE; m_bFileOpen = false; } #endif } ///////////////////////////////////////////////////////////////// bool CMDLFileData::ReadFile(bool bReadHeaderOnly) { try { // Open the file. ifstream instr; instr.open(m_strFileName.c_str(), ios::in | ios::binary); // Check if open if (!instr) { m_strError = "Unable to open the MDL file for reading."; return false; } // Read the header. if (ReadFileHeader(instr) == false) return false; // Stop if just reading the header. if (bReadHeaderOnly) return true; // Read Data if (m_Header.GetNumProbes() > 0) { // Allocate for the probe data. CMDLProbeData *pProbeData; m_ProbeData.resize(m_Header.GetNumProbes()); // Loop over number of probes char strData[MDLDATASIZE]; MDLData data; for (int32_t i=0; i #endif ////////////////////////////////////////////////////////////////////// #include "portability/affy-base-types.h" // #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affxmdl { /*! Model version */ #define PLIER_MODEL_FILE_VERSION 1 /*! Reserved size */ #define RESERVED_SIZE 1020 /*! Analysis type */ enum MDLAnalysisType { /*! PLIER analysis */ MDL_PLIER=0 }; ////////////////////////////////////////////////////////////////////// #ifdef _MSC_VER /*! Use one byte alignment for memory mapping functionality */ #pragma pack(push, 1) #endif /*! PLIER model file header structure */ typedef struct _MDLHeader { int32_t nVersion; /*! Model file version number */ int32_t nNumProbes; /*! Number pf probes */ int32_t nAnalysisType; /*! PLIER analysis type */ char sChipType[64]; /*! Probe array type */ char sModifiedDate[36]; /*!Last modified date */ int32_t bMasked; /*! Model file generated with masking */ char sReserve[RESERVED_SIZE]; /*! Reserved for future use */ } MDLHeader; #ifdef _MSC_VER /*! End of one byte alignment */ #pragma pack(pop) #endif /*! This is the size of the object */ #define MDLHEADERSIZE sizeof(MDLHeader) /*! PLIER model file header class */ class CMDLFileHeader { protected: int32_t m_nVersion; /*! Model file version number */ int32_t m_nNumProbes; /*! Number pf probes */ int32_t m_nAnalysisType; /*! PLIER analysis type */ bool m_bMasked; /*! Model file generated with masking */ std::string m_strChipType; /*! Probe array type */ std::string m_strModifiedDate; /*! Last modified date */ std::string m_strReserve; /*! Reserved for future use */ friend class CMDLFileData; /*! Friend to the parent class */ public: /*! Operator for Header assignment * @param rhs The header object to copy * @return The copied header */ CMDLFileHeader &operator=(CMDLFileHeader &rhs); /*! The version number * @return The version number. */ int32_t GetVersion() const { return m_nVersion; } /*! The number of probes * @return The number of probes */ int32_t GetNumProbes() const { return m_nNumProbes; } /*! The analysis type * @return The analysis type */ int32_t GetAnalysisType() const { return m_nAnalysisType; } /*! The mask flag * @return The mask flag */ bool GetMasked() const { return m_bMasked; } /*! The chip type * @return The chip type */ std::string GetChipType() const { return m_strChipType; } /*! The modified date * @return The modified date */ std::string GetModifiedDate() const { return m_strModifiedDate; } /*! Reserved */ std::string GetReserve() const { return m_strReserve; } /*! Sets the version number * @param value The version number */ void SetVersion(int32_t value) { m_nVersion = value; } /*! Sets the number of probes * @param value The number of probes */ void SetNumProbes(int32_t value) { m_nNumProbes = value; } /*! Sets the analysis type * @param value The analysis type */ void SetAnalysisType(int32_t value) { m_nAnalysisType = value; } /*! Sets the mask flag * @param value The mask flag */ void SetMasked(bool value) { m_bMasked = value; } /*! Sets the chip type * @param value The chip type */ void SetChipType(const char *value) { m_strChipType = value; } /*! Sets the modification date * @param value The date */ void SetModifiedDate(const char *value) { m_strModifiedDate = value; } /*! Constructor * @param nProbes The number of probes * @param analysisType The type of analysis */ CMDLFileHeader(int32_t nProbes=0, int32_t analysisType=MDL_PLIER); }; ////////////////////////////////////////////////////////////////////// #ifdef _MSC_VER /*! Use one byte alignment for memory mapping functionality */ #pragma pack(push, 1) #endif /*! PLIER model data structure */ typedef struct _MDLData { double Affinity; /*! Affinity value */ float RelativeBkg; /*! Reserved for future use */ float Saturation; /*! Reserved for future use */ float Offset; /*! Reserved for future use */ } MDLData; #ifdef _MSC_VER /*! End of one byte alignment */ #pragma pack(pop) #endif /*! This is the size of the object */ #define MDLDATASIZE sizeof(MDLData) /*! PLIER model probe data class */ class CMDLProbeData { protected: double m_dAffinity; /*! Probe affinity value */ float m_fRelativeBkg; /*! Reserved for future use */ float m_fSaturation; /*! Reserved for future use */ float m_fOffset; /*! Reserved for future use */ int32_t m_nIndex; /*! An index for the object */ bool m_bMapped; /*! Flag indicating if memory mapped */ char *m_lpData; /*! Memory map pointer */ friend class CMDLFileData; /*! Friend to parent object */ /*! Copies the class object, one member at a time * @param orig The object to copy */ void MakeShallowCopy(CMDLProbeData &orig); public: /*! The affinity value * @return The affinity value */ double GetAffinity() const { return m_dAffinity; } /*! The relative background * @return The relative background */ float GetRelativeBkg() const { return m_fRelativeBkg; } /*! The saturation * @return The saturation */ float GetSaturation() const { return m_fSaturation; } /*! The offset * @return The offset */ float GetOffset() const { return m_fOffset; } /*! Sets the affinity * @param value The affinity */ void SetAffinity(double value) { m_dAffinity = value; } /*! Sets the relative background * @param value The relative background */ void SetRelativeBkg(float value) { m_fRelativeBkg = value; } /*! Sets the saturation * @param value The saturation */ void SetSaturation(float value) { m_fSaturation = value; } /*! Sets the offset * @param value The offset */ void SetOffset(float value) { m_fOffset = value; } /*! Constructor */ CMDLProbeData(); }; ////////////////////////////////////////////////////////////////////// /*! PLIER model file stores the model data as the structure defined above. (1) It is probe array type specific. Only the model file with the same probe array type can be used for PLIER analysis (2) It stores the model data for each probe (or cells). (3) It starts storing model data for all PM probes and follows by all MM probes. (4) Sum(a(j)) = # of probes */ class CMDLFileData { protected: /*! The file header */ CMDLFileHeader m_Header; /*! The file name */ std::string m_strFileName; /*! Stores an error message */ std::string m_strError; /*! The probe data */ std::vector m_ProbeData; /*! Gets the header * @param pHeader The header data */ void GetHeaderInfo(MDLHeader* pHeader); /*! Sets the header * @param pHeader The header data */ void SetHeaderInfo(MDLHeader* pHeader); /*! Sets the data in the class * @param pData The data * @param data The probe data */ void SetDataInfo(MDLData* pData, CMDLProbeData *data); /*! Gets the data from the class * @param pData The data * @param data The probe data */ void GetDataInfo(MDLData* pData, CMDLProbeData &data); /*! Opens the file * @param bReadHeaderOnly Flag to read only the header * @return True if successful */ bool Open(bool bReadHeaderOnly=false); /*! Reads the file * @param bReadHeaderOnly Flag to read only the header * @return True if successful */ bool ReadFile(bool bReadHeaderOnly=false); /*! Reads the file header * @param instr The file stream. * @return True if successful */ bool ReadFileHeader(std::ifstream &instr); #if defined(_USE_MEM_MAPPING_) && defined(_MSC_VER) bool ReadFileUsingMemMap(bool bReadHeaderOnly); void *m_lpFileMap; char *m_lpData; HANDLE m_hFileMap; HANDLE m_hFile; bool m_bFileOpen; bool m_bFileMapped; int32_t m_nHeaderOffset; #endif public: /*! Sets the file name * @param value The file name */ void SetFileName(const char *value) { m_strFileName = value; } /*! Returns the file name * @return The file name */ std::string GetFileName() const { return m_strFileName; } /*! The error message * @return The error message */ std::string GetError() const { return m_strError; } /*! Returns the header * @return The file header object */ CMDLFileHeader &GetHeader() { return m_Header; } /*! Sets the header * @param header The header to save */ void SetHeader(CMDLFileHeader& header) { m_Header = header; } /*! Gets the probe data * @param index Index to the probe data object * @param data The probe data */ void GetProbeData(int32_t index, CMDLProbeData& data); /*! Sets the probe data * @param index Index to the probe data object * @param data The probe data */ void SetProbeData(int32_t index, CMDLProbeData data); /*! Reads the entire file * @return True if successful */ bool Read(); /*! Reads the file header only * @return True if successful */ bool ReadHeader(); /*! Writes the file * @return True if successful */ bool Write(); /*! Checks if the file exists * @return True if the file exists */ bool Exists(); /*! Closes the file */ void Close(); /*! Initializes the object for writing * @param nSize The size * @param defAffinity The default affynity value * @param analysisType The analysis type */ void InitializeForWriting(int32_t nSize=0, double defAffinity=1.0, int32_t analysisType=MDL_PLIER); /*! Outputs the file context for debugging * @param sFile The output file name * @param nProbes The number of probes * @return True if successful */ bool PrintMDLToTextFile(const char* sFile, int32_t nProbes); /*! Constructor */ CMDLFileData(int32_t nSize=0, double defAffinity=1.0, int32_t analysisType=MDL_PLIER); /*! Destructor */ ~CMDLFileData(); }; //////////////////////////////////////////////////////////////////// } // namespace #endif // _MdlFileData_h_ affxparser/src/fusion/file/MSKFileData.cpp0000644000175200017520000001224714516003651021505 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "file/MSKFileData.h" // #include #include #include #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #endif #ifdef _INCLUDE_UNISTD_HEADER_ #include #endif ////////////////////////////////////////////////////////////////////// using namespace affxmsk; using namespace std; ////////////////////////////////////////////////////////////////////// /*! The call section in the mask file contains probes to ignore from ana analysis */ #define CALL_SECTION "[Call]" /*! The comp section in the mask file contains probe sets to use for scaling or normalization */ #define COMP_SECTION "[Comp]" ////////////////////////////////////////////////////////////////////// /* * Initialize the class to null values */ CMSKFileData::CMSKFileData() { } ////////////////////////////////////////////////////////////////////// /* * Clean up any used memory */ CMSKFileData::~CMSKFileData() { Clear(); } ////////////////////////////////////////////////////////////////////// /* * Deallocate any memory for all the class members and initialize them back to * null or zero values. */ void CMSKFileData::Clear() { m_strArrayType = ""; m_Indicies.clear(); m_Sets.clear(); } ////////////////////////////////////////////////////////////////////// /* * Check for the file existance */ bool CMSKFileData::Exists() { // Find the file stats. struct stat st; return (stat(m_strFileName.c_str(), &st) == 0); } ////////////////////////////////////////////////////////////////////// /* * Read the file contents. */ bool CMSKFileData::Read() { // Clear any existing data. Clear(); // Open the mask file. ifstream instream(m_strFileName.c_str(), ios::in); if (!instream) { Clear(); m_strError = "Unable to open the mask file."; return false; } // The first line is the array type const int LINELENGTH = 1024; char str[LINELENGTH]; instream.getline(str, LINELENGTH); m_strArrayType = str; // Search for the call or comp sections. bool bCallSectionFound = false; bool bCompSectionFound = false; while (instream.getline(str, LINELENGTH)) { // Skip blank lines. if (strlen(str) == 0) continue; // If the call section is found then continue with the while loop. // Set the flags to start processing lines as from the call section // and stop processing lines as from the comp section. if (strncmp(str, CALL_SECTION, strlen(CALL_SECTION)) == 0) { bCallSectionFound = true; bCompSectionFound = false; continue; } // If the comp section is found then continue with the while loop. // Set the flags to start processing lines as from the comp section // and stop processing lines as from the call section. if (strncmp(str, COMP_SECTION, strlen(COMP_SECTION)) == 0) { bCallSectionFound = false; bCompSectionFound = true; continue; } // Parse the call section line. if (bCallSectionFound) { // Get the probe set name. char *token = strtok(str, "\t"); ProbeSetIndiciesType indicies; indicies.probeSetName = token; // Get the mask values. char *maskValues = strtok(NULL, "\t"); // Loop over all the mask pairs (start-stop). char *maskEntry; while ((maskEntry = strtok(maskValues, ",")) != NULL) { // Extract the first and last index from the token. int first=0; int last=0; if (sscanf(maskEntry, "%d-%d", &first, &last) == 1) last = first; maskValues = NULL; // The indicies are one based in the mask file. Convert them to zero based // and store them in the list. for (int i=first-1; i #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affxmsk { /*! A type to hold the indicies for a probe set */ typedef struct _ProbeSetIndiciesType { /*! The name of the probe set */ std::string probeSetName; /*! The list of indicies for the probe set */ std::list indicies; } ProbeSetIndiciesType; /*! An STL list of ProbeSetIndiciesType objects */ typedef std::list ProbeSetIndiciesList; /*! An STL constant iterator for the ProbeSetIndiciesList object */ typedef ProbeSetIndiciesList::const_iterator ProbeSetIndiciesListConstIt; /*! An STL list of probe set names */ typedef std::list ProbeSetList; /*! An STL constant iterator for the ProbeSetList object */ typedef ProbeSetList::const_iterator ProbeSetListConstIt; //////////////////////////////////////////////////////////////////// /*! Stores the contents of a MSK file. * * The mask file contains two sections. The first section stores a list of probe sets which * is used by the scaling and normalization functions of the MAS5 algorithm. The second section * defines the list of probe pairs to ignore in the MAS5 algorithm. */ class CMSKFileData { public: /*! Constructor */ CMSKFileData(); /*! Destructor */ ~CMSKFileData(); protected: /*! The name of the MSK file */ std::string m_strFileName; /*! An error string when a read error occurs */ std::string m_strError; /*! The array type in the mask file */ std::string m_strArrayType; /*! The list of probe set indicies */ ProbeSetIndiciesList m_Indicies; /*! The List of probe sets */ ProbeSetList m_Sets; public: /*! Sets the file name. * * @param name The name of the MSK file. */ void SetFileName(const char *name) { m_strFileName = name; } /*! Gets the file name. * * @return The file name. */ const char *GetFileName() const { return m_strFileName.c_str(); } /*! Gets the array type. * * @return The probe array type in the mask file. */ const char *GetArrayType() const { return m_strArrayType.c_str(); } /*! Gets the number of probe set indicies in the mask file * * @return The number of probe set indicies in the mask file */ int GetProbeSetIndiciesListCount() const { return (int) m_Indicies.size(); } /*! Gets the number of probe sets in the mask file. * * @return The number of probe sets in the mask file. */ int GetProbeSetListCount() const { return (int) m_Sets.size(); } /*! The error string if a read error occurs * * @return The error string if an error occurs */ std::string GetError() const { return m_strError; } /*! Returns the probe set indicies iterators * * @param begin The head of the list. * @param end The end of the list. */ void GetProbeSetIndiciesIterators(ProbeSetIndiciesListConstIt &begin, ProbeSetIndiciesListConstIt &end); /*! Returns the probe set iterators * * @param begin The head of the list. * @param end The end of the list. */ void GetProbeSetIterators(ProbeSetListConstIt &begin, ProbeSetListConstIt &end); /*! Reads the contents of the file. * * @return True if successful */ bool Read(); /*! Checks for the existance of a file. * * @return True if the file exists */ bool Exists(); /*! Clears memory associated with the class */ void Clear(); }; //////////////////////////////////////////////////////////////////// } // namespace ////////////////////////////////////////////////////////////////////// #endif affxparser/src/fusion/file/MSKFileWriter.cpp0000644000175200017520000000610214516003651022101 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/MSKFileWriter.h" // #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #endif ////////////////////////////////////////////////////////////////////// using namespace affxmsk; using namespace std; ////////////////////////////////////////////////////////////////////// /*! The call section in the mask file contains probes to ignore from ana analysis */ #define CALL_SECTION "[Call]" /*! The comp section in the mask file contains probe sets to use for scaling or normalization */ #define COMP_SECTION "[Comp]" ////////////////////////////////////////////////////////////////////// /* * Initialize the class to null values */ CMSKFileWriter::CMSKFileWriter() { } /* * Clean up any used memory */ CMSKFileWriter::~CMSKFileWriter() { Clear(); } /* * Write the file contents. The probe indicies are one based in the file. */ bool CMSKFileWriter::Save() { m_strError = ""; // Check if the array type has been set. if (m_strArrayType.length() == 0) { m_strError = "The array type has not been set."; return false; } // Open the mask file. ofstream outstream(m_strFileName.c_str(), ios::out); if (!outstream) { m_strError = "Unable to create the mask file."; return false; } // The first line is the array type outstream << m_strArrayType << endl; // Save the comp section. int size = (int) m_Sets.size(); if (size > 0) { outstream << endl << COMP_SECTION << endl; for (list::const_iterator it=m_Sets.begin(); it!=m_Sets.end(); ++it) { outstream << (*it) << endl; } } // Save the call section. size = (int) m_Indicies.size(); if (size > 0) { outstream << endl << CALL_SECTION << endl; for (ProbeSetIndiciesListConstIt it=m_Indicies.begin(); it!=m_Indicies.end(); ++it) { const ProbeSetIndiciesType &entry = *it; outstream << entry.probeSetName << "\t"; int index=0; size = (int) entry.indicies.size(); for (list::const_iterator inIt=entry.indicies.begin(); inIt!=entry.indicies.end(); ++inIt) { outstream << (*inIt) + 1; if (index < size-1) outstream << ","; ++index; } outstream << endl; } } return true; } ////////////////////////////////////////////////////////////////////// affxparser/src/fusion/file/MSKFileWriter.h0000644000175200017520000000475714516003651021564 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _MSKFileWriter_HEADER_ #define _MSKFileWriter_HEADER_ /*! \file MSKFileWriter.h This file provides writing capaibilities for MSK files. */ ////////////////////////////////////////////////////////////////////// #include "file/MSKFileData.h" // ////////////////////////////////////////////////////////////////////// namespace affxmsk { /*! Provides functions to write a MSK file. * * The mask file contains two sections. The first section stores a list of probe sets which * is used by the scaling and normalization functions of the MAS5 algorithm. The second section * defines the list of probe pairs to ignore in the MAS5 algorithm. * * The indicies are stored in the file one based. When read, they are converted to zero based. */ class CMSKFileWriter : public CMSKFileData { public: /*! Constructor */ CMSKFileWriter(); /*! Destructor */ ~CMSKFileWriter(); public: /*! Sets the array type. * @param arrayType The probe array type. */ void SetArrayType(const char *arrayType) { m_strArrayType = arrayType; } /*! Saves the data to a MSK file. * * @return True if successful */ bool Save(); /*! Adds a set of probe indicies to the list. * @param indicies The probe indicies (the indicies should be one bases here). */ void AddProbeSetIndicies(const ProbeSetIndiciesType &indicies) { m_Indicies.push_back(indicies); } /*! Adds a probe set to the list. * @param probeSet The probe set to add. */ void AddProbeSet(const char *probeSet) { m_Sets.push_back(probeSet); } }; //////////////////////////////////////////////////////////////////// } // namespace ////////////////////////////////////////////////////////////////////// #endif affxparser/src/fusion/file/PSIFileData.cpp0000644000175200017520000000574714516003651021515 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "file/PSIFileData.h" // #include #include #include #include #include #include #include // #ifdef _MSC_VER #pragma warning(disable: 4996) // don't show deprecated warnings. #endif #ifdef _INCLUDE_UNISTD_HEADER_ #include #endif ////////////////////////////////////////////////////////////////////// using namespace affxpsi; using namespace std; ////////////////////////////////////////////////////////////////////// /* * Initialize the class to null values */ CPSIFileData::CPSIFileData() { } ////////////////////////////////////////////////////////////////////// /* * Clean up any used memory */ CPSIFileData::~CPSIFileData() { Clear(); } ////////////////////////////////////////////////////////////////////// /* * Deallocate any memory for all the class members. */ void CPSIFileData::Clear() { m_ProbeSets.clear(); } ////////////////////////////////////////////////////////////////////// /* * Check for the file existance */ bool CPSIFileData::Exists() { // Find the file stats. struct stat st; return (stat(m_strFileName.c_str(), &st) == 0); } ////////////////////////////////////////////////////////////////////// /* * Read the file contents. */ bool CPSIFileData::Read() { // Clear any existing data. Clear(); // Open the mask file. ifstream instream(m_strFileName.c_str(), ios::in); if (!instream) { Clear(); return false; } // The first line is the number of probe sets int nSets=0; const int LINELENGTH = 128; char str[LINELENGTH]; instream.getline(str, LINELENGTH); if (sscanf(str, "#Probe Sets: %d", &nSets) == 0) { Clear(); return false; } // The remaining lines contain the probe set names. m_ProbeSets.resize(nSets); int num; int iSet=0; char name[LINELENGTH]; while (instream.getline(str, LINELENGTH)) { // Skip blank lines. if (strlen(str) == 0) continue; // Parse and store the line. sscanf(str, "%*s %s %d", name, &num); m_ProbeSets[iSet].probeSetName = name; m_ProbeSets[iSet].numberPairs = num; ++iSet; } return true; } ////////////////////////////////////////////////////////////////////// affxparser/src/fusion/file/PSIFileData.h0000644000175200017520000000620214516003651021145 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _PSIFileData_HEADER_ #define _PSIFileData_HEADER_ /*! \file PSIFileData.h This file provides reading capaibilities for PSI files. */ ////////////////////////////////////////////////////////////////////// #include #include #include #include // ////////////////////////////////////////////////////////////////////// namespace affxpsi { /*! A type to hold the probe set information file entry */ typedef struct _ProbeSetInfo { /*! The name of the probe set */ std::string probeSetName; /*! The number of probe pairs in the set */ int numberPairs; } ProbeSetInfo; /*! An STL vector of ProbeSetInfo objects */ typedef std::vector ProbeSetInfoVector; /*! Stores the contents of a PSI file. * * The PSI file the probe set names and the number of probe pairs in each set. */ class CPSIFileData { public: /*! Constructor */ CPSIFileData(); /*! Destructor */ ~CPSIFileData(); protected: /*! The name of the PSI file */ std::string m_strFileName; /*! The probe set information */ ProbeSetInfoVector m_ProbeSets; public: /*! Sets the file name. * @param name The name of the PSI file. */ void SetFileName(const char *name) { m_strFileName = name; } /*! Gets the file name. * @return The file name. */ const char *GetFileName() const { return m_strFileName.c_str(); } /*! Gets the number of probe sets. * @return The number of probe sets. */ int GetProbeSetCount() const { return (int) m_ProbeSets.size(); } /*! Gets the probe set name. * @param index The index to the probe set vector. * @return The probe set name. */ std::string GetProbeSetName(int index) const { return m_ProbeSets[index].probeSetName; } /*! Gets the number of probe pairs. * @param index The index to the probe set vector. * @return The number of probe pairs. */ int GetProbePairs(int index) const { return m_ProbeSets[index].numberPairs; } /*! Reads the contents of the file. * @return True if successful */ bool Read(); /*! Checks for the existance of a file. * @return True if the file exists */ bool Exists(); /*! Clears memory associated with the class */ void Clear(); }; //////////////////////////////////////////////////////////////////// } // namespace ////////////////////////////////////////////////////////////////////// #endif affxparser/src/fusion/file/SAXDttArrayHandlers.cpp0000644000175200017520000002557614516003651023261 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/SAXDttArrayHandlers.h" // #include #include #include // using namespace affymetrix_dttarray; using namespace std; XERCES_CPP_NAMESPACE_USE; /*! Converts a XML character string to string. * @param c1 The XML string to convert * @return The converted string */ static string XMLChToString(const XMLCh* const c1) { string s; int i=0; while (c1[i] != 0) { s += (char)c1[i]; ++i; } return s; } /*! Comparison operator for XML strings to wide character strings. * @param c1 The XML string * @param c2 The wide string * @return True if equal */ static bool operator==(const XMLCh* const &c1, wstring c2) { if (c2.length() != XMLString::stringLen(c1)) return false; for (int i=0; i<(int)c2.length(); i++) { if (c2[i] != c1[i]) return false; } return true; } /* * Store the array data and set the starting element to the head. */ SAXArrayHandlers::SAXArrayHandlers(DttArrayData *data) : levelsDeep(0), arrayData(data), currentElement(DTT_ARRAY_FILE) { } /* * Destructor */ SAXArrayHandlers::~SAXArrayHandlers() { } /* * No processing needed. This is only here to provide a derived function. */ void SAXArrayHandlers::startDocument() { } /* * No processing needed. This is only here to provide a derived function. */ void SAXArrayHandlers::endDocument() { } /* * Back up the current element. */ void SAXArrayHandlers::endElement(const XMLCh* const name) { MoveCurrentElementBack(name); } /* * Back up the current element. */ void SAXArrayHandlers::MoveCurrentElementBack(const XMLCh* const name) { if (name == 0) return; if ((currentElement == BIOSOURCE_PROPERTYSETS_ASSNLIST_NAMEVALUETYPE || currentElement == EXPERIMENT_PROPERTYSETS_ASSNLIST_NAMEVALUETYPE) && levelsDeep > 0) { --levelsDeep; } else if (name == BIOMATERIAL_PACKAGE_ELEMENT) currentElement = DTT_ARRAY_FILE; else if (name == BIOMATERIAL_ASSNLIST_ELEMENT) currentElement = BIOMATERIAL_PACKAGE; else if (name == BIOSOURCE_ELEMENT) currentElement = BIOMATERIAL_ASSNLIST; else if (name == PROPERTYSETS_ASSNLIST_ELEMENT && currentElement == BIOSOURCE_PROPERTYSETS_ASSNLIST) currentElement = BIOSOURCE; else if (name == NAME_VALUE_TYPE_ELEMENT && currentElement == BIOSOURCE_PROPERTYSETS_ASSNLIST_NAMEVALUETYPE) currentElement = BIOSOURCE_PROPERTYSETS_ASSNLIST; else if (name == CHARACTERISTICS_ASSNLIST_ELEMENT && currentElement == BIOSOURCE_CHARACTERISTICS_ASSNLIST) currentElement = BIOSOURCE; else if (name == ONTOLOGY_ENTRY_ELEMENT && currentElement == BIOSOURCE_CHARACTERISTICS_ASSNLIST_ONTOLOGY) currentElement = BIOSOURCE_CHARACTERISTICS_ASSNLIST; else if (name == MATERIAL_TYPE_ASSN_ELEMENT && currentElement == BIOSOURCE_MATERIALTYPE_ASSNLIST) currentElement = BIOSOURCE; else if (name == ONTOLOGY_ENTRY_ELEMENT && currentElement == BIOSOURCE_MATERIALTYPE_ASSNLIST_ONTOLOGY) currentElement = BIOSOURCE_MATERIALTYPE_ASSNLIST; else if (name == EXPERIMENT_PACKAGE_ELEMENT) currentElement = DTT_ARRAY_FILE; else if (name == EXPERIMENT_ASSNLIST_ELEMENT) currentElement = EXPERIMENT_PACKAGE; else if (name == EXPERIMENT_ELEMENT) currentElement = EXPERIMENT_ASSNLIST; else if (name == PROPERTYSETS_ASSNLIST_ELEMENT && currentElement == EXPERIMENT_PROPERTYSETS_ASSNLIST) currentElement = EXPERIMENT; else if (name == NAME_VALUE_TYPE_ELEMENT && currentElement == EXPERIMENT_PROPERTYSETS_ASSNLIST_NAMEVALUETYPE) currentElement = EXPERIMENT_PROPERTYSETS_ASSNLIST; else if (name == ARRAY_DESIGN_PACKAGE_ELEMENT) currentElement = DTT_ARRAY_FILE; else if (name == ARRAY_DESIGN_ASSNLIST_ELEMENT) currentElement = ARRAY_DESIGN_PACKAGE; else if (name == PHYSICAL_ARRAY_DESIGN_ELEMENT) currentElement = ARRAY_DESIGN_ASSNLIST; } /* * Advance the current element. */ bool SAXArrayHandlers::MoveCurrentElementForward(const XMLCh* const name) { if (name == 0) return false; if (name == BIOMATERIAL_PACKAGE_ELEMENT) currentElement = BIOMATERIAL_PACKAGE; else if (name == BIOMATERIAL_ASSNLIST_ELEMENT) currentElement = BIOMATERIAL_ASSNLIST; else if (name == BIOSOURCE_ELEMENT) currentElement = BIOSOURCE; else if (name == PROPERTYSETS_ASSNLIST_ELEMENT && currentElement == BIOSOURCE) currentElement = BIOSOURCE_PROPERTYSETS_ASSNLIST; else if (name == NAME_VALUE_TYPE_ELEMENT && currentElement == BIOSOURCE_PROPERTYSETS_ASSNLIST) currentElement = BIOSOURCE_PROPERTYSETS_ASSNLIST_NAMEVALUETYPE; else if (name == CHARACTERISTICS_ASSNLIST_ELEMENT && currentElement == BIOSOURCE) currentElement = BIOSOURCE_CHARACTERISTICS_ASSNLIST; else if (name == ONTOLOGY_ENTRY_ELEMENT && currentElement == BIOSOURCE_CHARACTERISTICS_ASSNLIST) currentElement = BIOSOURCE_CHARACTERISTICS_ASSNLIST_ONTOLOGY; else if (name == MATERIAL_TYPE_ASSN_ELEMENT && currentElement == BIOSOURCE) currentElement = BIOSOURCE_MATERIALTYPE_ASSNLIST; else if (name == ONTOLOGY_ENTRY_ELEMENT && currentElement == BIOSOURCE_MATERIALTYPE_ASSNLIST) currentElement = BIOSOURCE_MATERIALTYPE_ASSNLIST_ONTOLOGY; else if (name == EXPERIMENT_PACKAGE_ELEMENT) currentElement = EXPERIMENT_PACKAGE; else if (name == EXPERIMENT_ASSNLIST_ELEMENT) currentElement = EXPERIMENT_ASSNLIST; else if (name == EXPERIMENT_ELEMENT) currentElement = EXPERIMENT; else if (name == PROPERTYSETS_ASSNLIST_ELEMENT && currentElement == EXPERIMENT) currentElement = EXPERIMENT_PROPERTYSETS_ASSNLIST; else if (name == NAME_VALUE_TYPE_ELEMENT && currentElement == EXPERIMENT_PROPERTYSETS_ASSNLIST) currentElement = EXPERIMENT_PROPERTYSETS_ASSNLIST_NAMEVALUETYPE; else if (name == ARRAY_DESIGN_PACKAGE_ELEMENT) currentElement = ARRAY_DESIGN_PACKAGE; else if (name == ARRAY_DESIGN_ASSNLIST_ELEMENT) currentElement = ARRAY_DESIGN_ASSNLIST; else if (name == PHYSICAL_ARRAY_DESIGN_ELEMENT) currentElement = PHYSICAL_ARRAY_DESIGN; else if (currentElement == BIOSOURCE_PROPERTYSETS_ASSNLIST_NAMEVALUETYPE || currentElement == EXPERIMENT_PROPERTYSETS_ASSNLIST_NAMEVALUETYPE) { ++levelsDeep; return false; } else return false; return true; } /* * Set the current element based on the name and the current element. * Based on the current element, store the data in the array object. */ void SAXArrayHandlers::startElement(const XMLCh* const name, AttributeList& attributes) { if (MoveCurrentElementForward(name) == false) return; switch (currentElement) { case EXPERIMENT: StoreExperimentNameAttribute(attributes); break; case BIOSOURCE: StoreSampleNameAttribute(attributes); break; case BIOSOURCE_PROPERTYSETS_ASSNLIST_NAMEVALUETYPE: case EXPERIMENT_PROPERTYSETS_ASSNLIST_NAMEVALUETYPE: StoreNameValueAttributes(attributes); break; case BIOSOURCE_MATERIALTYPE_ASSNLIST_ONTOLOGY: StoreSampleTypeAttribute(attributes); break; case BIOSOURCE_CHARACTERISTICS_ASSNLIST_ONTOLOGY: StoreSampleProjectAttribute(attributes); break; case PHYSICAL_ARRAY_DESIGN: StoreArrayType(attributes); break; default: break; } } /* * Stores the array type. */ void SAXArrayHandlers::StoreArrayType(AttributeList& attributes) { unsigned int len = attributes.getLength(); for (unsigned int index = 0; index < len; index++) { if (attributes.getName(index) == NAME_ATTRIBUTE) { arrayData->SetArrayType(XMLChToString(attributes.getValue(index))); break; } } } /* * Stores the experiment name attribute from the experiment element. */ void SAXArrayHandlers::StoreExperimentNameAttribute(AttributeList& attributes) { unsigned int len = attributes.getLength(); for (unsigned int index = 0; index < len; index++) { if (attributes.getName(index) == NAME_ATTRIBUTE) { arrayData->SetExperimentName(XMLChToString(attributes.getValue(index))); break; } } } /* * Stores the sample name attribute from the bio source element. */ void SAXArrayHandlers::StoreSampleNameAttribute(AttributeList& attributes) { unsigned int len = attributes.getLength(); for (unsigned int index = 0; index < len; index++) { if (attributes.getName(index) == NAME_ATTRIBUTE) { AttributeNameValueType param; param.name = GCOS_SAMPLE_NAME_PARAMETER_NAME; param.value = XMLChToString(attributes.getValue(index)); param.type = STRING_TYPE; arrayData->Attributes().push_back(param); break; } } } /* * Stores the sample type attribute from the biosource/materialtype element. */ void SAXArrayHandlers::StoreSampleTypeAttribute(AttributeList& attributes) { unsigned int len = attributes.getLength(); for (unsigned int index = 0; index < len; index++) { if (attributes.getName(index) == ONTOLOGY_VALUE_ATTRIBUTE) { AttributeNameValueType param; param.name = GCOS_SAMPLE_TYPE_PARAMETER_NAME; param.value = XMLChToString(attributes.getValue(index)); param.type = STRING_TYPE; arrayData->Attributes().push_back(param); break; } } } /* * Stores the sample project attribute from the biosource/characteristics element. */ void SAXArrayHandlers::StoreSampleProjectAttribute(AttributeList& attributes) { unsigned int len = attributes.getLength(); for (unsigned int index = 0; index < len; index++) { if (attributes.getName(index) == ONTOLOGY_VALUE_ATTRIBUTE) { AttributeNameValueType param; param.name = GCOS_SAMPLE_PROJECT_PARAMETER_NAME; param.value = XMLChToString(attributes.getValue(index)); param.type = STRING_TYPE; arrayData->Attributes().push_back(param); break; } } } /* * Stores the attributes from a name value type element. * Parse out the probe array type into its own member. */ void SAXArrayHandlers::StoreNameValueAttributes(AttributeList& attributes) { unsigned int len = attributes.getLength(); AttributeNameValueType param; for (unsigned int index = 0; index < len; index++) { if (attributes.getName(index) == NAME_ATTRIBUTE) { param.name = XMLChToString(attributes.getValue(index)); } else if (attributes.getName(index) == VALUE_ATTRIBUTE) { param.value = XMLChToString(attributes.getValue(index)); } else if (attributes.getName(index) == TYPE_ATTRIBUTE) { param.type = XMLChToString(attributes.getValue(index)); if (param.name == PROBE_ARRAY_TYPE_PARAMETER_NAME) { arrayData->SetArrayType(param.value); } else { arrayData->Attributes().push_back(param); } } } } affxparser/src/fusion/file/SAXDttArrayHandlers.h0000644000175200017520000001561214516003651022714 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _SAXDttArrayHandlers_HEADER_ #define _SAXDttArrayHandlers_HEADER_ /*! \file SAXDttArrayHandlers.h This file provides SAX parsing handles for array files. */ #include "file/DttArrayFileReader.h" // #include #include #include // #include #include #include // namespace affymetrix_dttarray { /*! The probe array type parameter name. */ #define PROBE_ARRAY_TYPE_PARAMETER_NAME std::string("Probe Array Type") /*! The name of the biomaterial package element. */ #define BIOMATERIAL_PACKAGE_ELEMENT std::wstring(L"BioMaterial_package") /*! The name of the biomaterial association list element. */ #define BIOMATERIAL_ASSNLIST_ELEMENT std::wstring(L"BioMaterial_assnlist") /*! The name of the biosource list element. */ #define BIOSOURCE_ELEMENT std::wstring(L"BioSource") /*! The name of the "name" attribute. */ #define NAME_ATTRIBUTE std::wstring(L"name") /*! The name of the "value" attribute. */ #define VALUE_ATTRIBUTE std::wstring(L"value") /*! The name of the "type" attribute. */ #define TYPE_ATTRIBUTE std::wstring(L"type") /*! The value of the "type" attribute for sample info. */ #define STRING_TYPE std::string("string") /*! The name of the property sets association list element. */ #define PROPERTYSETS_ASSNLIST_ELEMENT std::wstring(L"PropertySets_assnlist") /*! The name of the name/value type element. */ #define NAME_VALUE_TYPE_ELEMENT std::wstring(L"NameValueType") /*! The name of the characteristics association list element. */ #define CHARACTERISTICS_ASSNLIST_ELEMENT std::wstring(L"Characteristics_assnlist") /*! The name of the material type association element. */ #define MATERIAL_TYPE_ASSN_ELEMENT std::wstring(L"MaterialType_assn") /*! The name of the ontology entry element. */ #define ONTOLOGY_ENTRY_ELEMENT std::wstring(L"OntologyEntry") /*! The name of the category attribute. */ #define ONTOLOGY_CATEGORY_ATTRIBUTE std::wstring(L"category") /*! The name of the value attribute. */ #define ONTOLOGY_VALUE_ATTRIBUTE std::wstring(L"value") /*! The experiment package element. */ #define EXPERIMENT_PACKAGE_ELEMENT std::wstring(L"Experiment_package") /*! The experiment association list element. */ #define EXPERIMENT_ASSNLIST_ELEMENT std::wstring(L"Experiment_assnlist") /*! The experiment element. */ #define EXPERIMENT_ELEMENT std::wstring(L"Experiment") /*! The name of the array design package element. */ #define ARRAY_DESIGN_PACKAGE_ELEMENT std::wstring(L"ArrayDesign_package") /*! The name of the array design association list. */ #define ARRAY_DESIGN_ASSNLIST_ELEMENT std::wstring(L"ArrayDesign_assnlist") /*! The name of the physical array design. */ #define PHYSICAL_ARRAY_DESIGN_ELEMENT std::wstring(L"PhysicalArrayDesign") /*! Enumerants to hold the elements in a dtt array file. */ typedef enum { DTT_ARRAY_FILE, BIOMATERIAL_PACKAGE, BIOMATERIAL_ASSNLIST, BIOSOURCE, BIOSOURCE_PROPERTYSETS_ASSNLIST, BIOSOURCE_PROPERTYSETS_ASSNLIST_NAMEVALUETYPE, BIOSOURCE_CHARACTERISTICS_ASSNLIST, BIOSOURCE_CHARACTERISTICS_ASSNLIST_ONTOLOGY, BIOSOURCE_MATERIALTYPE_ASSNLIST, BIOSOURCE_MATERIALTYPE_ASSNLIST_ONTOLOGY, EXPERIMENT_PACKAGE, EXPERIMENT_ASSNLIST, EXPERIMENT, EXPERIMENT_PROPERTYSETS_ASSNLIST, EXPERIMENT_PROPERTYSETS_ASSNLIST_NAMEVALUETYPE, ARRAY_DESIGN_PACKAGE, ARRAY_DESIGN_ASSNLIST, PHYSICAL_ARRAY_DESIGN } DttArrayFileElements; /*! This class provides the SAX handlers for reading calvin array files. */ class SAXArrayHandlers : public XERCES_CPP_NAMESPACE::HandlerBase { private: /*! Used to determine the number of levels past the expected name/value type element. */ int levelsDeep; /*! A pointer to the array object. */ DttArrayData *arrayData; /*! The parent element that is currently being processed. */ DttArrayFileElements currentElement; /*! Stores the experiment name attribute from the experiment element. * @param attributes The name/value attributes. */ void StoreExperimentNameAttribute(XERCES_CPP_NAMESPACE::AttributeList& attributes); /*! Stores the sample name attribute from the bio source element. * @param attributes The name/value attributes. */ void StoreSampleNameAttribute(XERCES_CPP_NAMESPACE::AttributeList& attributes); /*! Stores the sample type attribute from the biosource/materialtype element. * @param attributes The name/value attributes. */ void StoreSampleTypeAttribute(XERCES_CPP_NAMESPACE::AttributeList& attributes); /*! Stores the sample project attribute from the biosource/characteristics element. * @param attributes The name/value attributes. */ void StoreSampleProjectAttribute(XERCES_CPP_NAMESPACE::AttributeList& attributes); /*! Stores the attributes from a name value type element. * @param attributes The name/value attributes. */ void StoreNameValueAttributes(XERCES_CPP_NAMESPACE::AttributeList& attributes); /*! Stores the array type from the physical array design element. * @param attributes The name/value attributes. */ void StoreArrayType(XERCES_CPP_NAMESPACE::AttributeList& attributes); /*! Modifies the state machine given the end of an element. * * @param name The name of the element. */ void MoveCurrentElementBack(const XMLCh* const name); /*! Modifies the state machine given the start of an element. * * @param name The name of the element. * @return True is a valid element was found. */ bool MoveCurrentElementForward(const XMLCh* const name); public: /*! Constructor * * @param data The array data. */ SAXArrayHandlers(DttArrayData *data); /*! Destructor */ ~SAXArrayHandlers(); /*! Called at the start of the document */ void startDocument(); /*! Called at the end of the document */ void endDocument(); /*! Called at the start of each element. * * @param name The name of the element. * @param attributes The attributes of the element. */ void startElement(const XMLCh* const name, XERCES_CPP_NAMESPACE::AttributeList& attributes); /*! Called at the end of each element. * * @param name The name of the element. */ void endElement(const XMLCh* const name); }; }; #endif // _SAXDttArrayHandlers_HEADER_ affxparser/src/fusion/file/SMDFileData.cpp0000644000175200017520000001152014516003651021467 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifdef _MSC_VER #define _CRT_SECURE_NO_WARNINGS #endif // #include "file/SMDFileData.h" // #include #ifdef _MSC_VER #define snprintf _snprintf #endif /*! INI file summary section name. */ static const std::string SummarySec = "SUMMARY"; /*! INI file key name for the number of frames. */ static const std::string Frames = "NumberOfFrames"; /*! INI file key name for the number of frames per row or frame columns. */ static const std::string FrameCols = "FramesPerRow"; /*! INI file key name for the number of frames per column or frame rows. */ static const std::string FrameRows = "FramesPerCol"; /*! INI file key name for the number of cell rows in the frame. */ static const std::string KeyRows = "Rows"; /*! INI file key name for the number of cell columns in the frame. */ static const std::string KeyCols = "Cols"; /*! INI file key name for the cell row index of the start of the frame. */ static const std::string KeyStartRow = "StartRow"; /*! INI file key name for the cell column index of the start of the frame. */ static const std::string KeyStartCol = "StartCol"; using namespace affxsmd; /* * Constructor */ SMDFrame::SMDFrame() { // Initialize to an invalid frame. frameIdx = -1; rows = -1; cols = -1; startRow = -1; startCol = -1; } /* * Constructor */ SMDFileData::SMDFileData() { Clear(); } /* * Destructor */ SMDFileData::~SMDFileData() { } /* * Check for the file existance */ bool SMDFileData::Exists() { // Find the file stats. struct stat st; return (stat(filename.c_str(), &st) == 0); } /* * Get the SMDFrame information for a frame. */ SMDFrame SMDFileData::GetFrame(unsigned int frameIdx) { if (frameIdx >= 0 && frameIdx < smdFrames.size()) { return smdFrames[frameIdx]; } else { SMDFrame invalidFrame; return invalidFrame; } } /* * Read the SMD file. */ bool SMDFileData::Read() { if (filename.length() == 0) return false; isRead = ReadData(); return isRead; } /* * Read the SMD data. */ bool SMDFileData::ReadData() { Clear(); CIniFile iniFile; std::vector allRecords = iniFile.GetAllRecords(filename); if (allRecords.size() == 0) // Either the file does not exist or is not the correct format. return false; int numFrames = 0; if (GetIntValue(allRecords, SummarySec, Frames, numFrames) == false) return false; if (numFrames < 1) return false; int numRows = 0; if (GetIntValue(allRecords, SummarySec, FrameRows, numRows) == false) return false; int numCols = 0; if (GetIntValue(allRecords, SummarySec, FrameCols, numCols) == false) return false; frameRows = numRows; frameCols = numCols; for (int iFrame = 0; iFrame < numFrames; ++iFrame) { SMDFrame frame; char sectionName[10]; snprintf(sectionName,sizeof(sectionName), "%d", iFrame); frame.frameIdx = iFrame; if (GetIntValue(allRecords, sectionName, KeyRows, frame.rows) == false) return false; if (GetIntValue(allRecords, sectionName, KeyCols, frame.cols) == false) return false; if (GetIntValue(allRecords, sectionName, KeyStartRow, frame.startRow) == false) return false; if (GetIntValue(allRecords, sectionName, KeyStartCol, frame.startCol) == false) return false; smdFrames.push_back(frame); } return true; } /* * Clear all members except filename. */ void SMDFileData::Clear() { smdFrames.clear(); frameRows = 0; frameCols = 0; isRead = false; } /* * Find a Record in a collection. * Move into CIniFile? */ CIniFile::Record* SMDFileData::FindRecord(std::vector& records, std::string section, std::string key) { for (std::vector::iterator ii = records.begin(); ii != records.end(); ++ii) { if (ii->Key == key && ii->Section == section) return &(*ii); } return 0; } /* * Get the integer value of a key. */ bool SMDFileData::GetIntValue(std::vector& records, std::string section, std::string key, int& value) { CIniFile::Record* record = FindRecord(records, section, key); if (record == 0) return false; value = atoi(record->Value.c_str()); return true; } affxparser/src/fusion/file/SMDFileData.h0000644000175200017520000000751214516003651021142 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /*! \file SMDFileData.h This file provides methods for reading a SMD file. */ #ifndef _SMDFILEDATA_H_ #define _SMDFILEDATA_H_ #include "file/IniFile.h" // #include #include #include // namespace affxsmd { /*! This class contains the frame information for each frame */ class SMDFrame { public: /*! Constructor */ SMDFrame(); /*! Frame index. */ int frameIdx; /*! Number of cell rows in the frame. */ int rows; /*! Number of cell columsn in the frame. */ int cols; /*! Cell row index of the start of the frame. */ int startRow; /*! Cell column index of the start of the frame. */ int startCol; }; /*! This class reads a SMD file. */ class SMDFileData { public: /*! Constructor */ SMDFileData(); /*! Destructor */ virtual ~SMDFileData(); /*! Sets the file name. * @param name The full path to the SMD file. */ void SetFileName (const char *name) { filename = name; } /*! Gets the file name. * @return The full path to the SMD file. */ const std::string GetFileName() const { return filename; } /*! Checks for the existance of the file passed to SetFileName. * @return True if the file exists. */ bool Exists(); /*! Read the SMD file. * @return Returns true if the file was successfully read. */ bool Read(); /*! Get the SMDFrame information for a frame. * @param frameIdx The frame index. * @return A SMDFrame with information of the frame indicated by the frameIdx parameter. */ SMDFrame GetFrame(unsigned int frameIdx); /*! Get the number of frames in the SMD. * @return Number of frames. */ unsigned int NumFrames() { return (unsigned int)smdFrames.size(); }; /*! The number of frames rows in the SMD (frames per column) */ unsigned int frameRows; /*! The number of frame columns in the SMD (frames per row) */ unsigned int frameCols; protected: /*! Read the SMD data. * @return Returns true if the SMD data was successfully read. */ bool ReadData(); /*! Clear all members except filename */ void Clear(); /*! Find a Record in a collection. * @param records A collection of Records. * @param section The section name of the Record to find. * @param key The key name of the Record to find. * @return A Record that matches the criteria, otherwise 0. */ CIniFile::Record* FindRecord(std::vector& records, std::string section, std::string key); /*! Get the integer value of a key. * @param records A collection of Records. * @param section The section name of the value. * @param key The key name of the value. * @param value The value found. * @return Returns true if the value was found in the collection. */ bool GetIntValue(std::vector& records, std::string section, std::string key, int& value); /*! A flag indicating if a SMD has been read. */ bool isRead; /*! SMD file name. Including the relation or full path. */ std::string filename; /*! A collection of SMDFrame information for all frames in the SMD. */ std::vector smdFrames; }; } #endif // _SMDFILEDATA_H_ affxparser/src/fusion/file/SimpleBinaryFile.cpp0000644000175200017520000000710514516003651022654 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/SimpleBinaryFile.h" // #include "file/FileIO.h" #include "file/FileWriter.h" // SimpleBinaryFile::SimpleBinaryFile() { } SimpleBinaryFile::~SimpleBinaryFile() { } bool SimpleBinaryFile::CreateNewFile(const std::string &fileName) { outFile.open(fileName.c_str(), std::ios::out | std::ios::binary); return (outFile ? true : false); } void SimpleBinaryFile::WriteHeader(int rows, int cols, const std::string &rowHeaderLabel, int rowHeaderLabelMaxLength, const std::vector &colIds, const std::vector &colNames) { rowHeaderLength = rowHeaderLabelMaxLength; WriteInt32_N(outFile, rows); WriteInt32_N(outFile, cols); WriteString_N(outFile, rowHeaderLabel); WriteInt32_N(outFile, rowHeaderLength); for (int icol=0; icol &values) { WriteRowLabel(rowLabel); for (std::vector::const_iterator it=values.begin(); it!=values.end(); ++it) WriteFloat_N(outFile, *it); } void SimpleBinaryFile::WriteRowLabel(const std::string &rowLabel) { WriteFixedString(outFile, rowLabel, rowHeaderLength); } void SimpleBinaryFile::WriteData(float data) { WriteFloat_N(outFile, data); } void SimpleBinaryFile::UpdateRowCount(const std::string &fileName, int rows) { std::ofstream fileStr; fileStr.open(fileName.c_str(), std::ios::in | std::ios::out | std::ios::binary); WriteInt32_N(fileStr, rows); fileStr.close(); } bool SimpleBinaryFile::OpenFile(const std::string &fileName, int &rows, int &cols, std::string &rowHeaderLabel, std::vector &colNames) { inFile.open(fileName.c_str(), std::ios::in | std::ios::binary); if (!outFile) return false; ReadInt32_N(inFile, rows); ReadInt32_N(inFile, cols); ReadString_N(inFile, rowHeaderLabel); ReadInt32_N(inFile, rowHeaderLength); colNames.resize(cols); for (int icol=0; icol &data) { inFile.seekg(dataStartPos + row*rowSize); ReadFixedString(inFile, rowLabel, rowHeaderLength); for (int i=0; i<(int)data.size(); i++) ReadFloat_N(inFile, data[i]); } affxparser/src/fusion/file/SimpleBinaryFile.h0000644000175200017520000000725614516003651022330 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _SimpleBinaryFile_HEADER_ #define _SimpleBinaryFile_HEADER_ /** * @file SimpleBinaryFile.h Provides interfaces to create a simple binary file * storing a table of data. */ // #include #include #include #include // /*! Used to create a simple binary file storing a table of data. */ class SimpleBinaryFile { public: /*! Constructor */ SimpleBinaryFile(); /*! Destructor */ ~SimpleBinaryFile(); /*! Create a new file. * @param fileName The file name. * @return True if created. */ bool CreateNewFile(const std::string &fileName); /*! Write the header * @param rows The number of rows * @param cols The number of columns of data (not including the row label. * @param rowHeaderLabel The label for the row header. * @param rowHeaderLabelMaxLength The maximum length of the row header values * @param colIds A unique id for each column * @param colNames The names of each column */ void WriteHeader(int rows, int cols, const std::string &rowHeaderLabel, int rowHeaderLabelMaxLength, const std::vector &colIds, const std::vector &colNames); /*! Write a row of data. * @param rowLabel The label for the row * @param values The column data */ void WriteRow(const std::string &rowLabel, const std::vector &values); /*! Write a row label. * @param rowLabel The label for the row */ void WriteRowLabel(const std::string &rowLabel); /*! Write a data point. * @param data The data to write. */ void WriteData(float data); /*! Close the file and check the status. */ bool Close(); /*! Update the row count. * @param fileName The name of the file. * @param rows The row count. */ void UpdateRowCount(const std::string &fileName, int rows); /*! Open an existing file. * @param fileName The file name. * @param rows The number of rows * @param cols The number of columns of data (not including the row label. * @param rowHeaderLabel The label for the row header. * @param colNames The names of each column * @return True if successful. */ bool OpenFile(const std::string &fileName, int &rows, int &cols, std::string &rowHeaderLabel, std::vector &colNames); /*! Read a row of data * @param row The row index * @param rowLabel The row label * @param data The data */ void ReadRow(int row, std::string &rowLabel, std::vector &data); private: /*! The output stream. */ std::ofstream outFile; /*! The input stream. */ std::ifstream inFile; /*! The length of the row headers. */ int rowHeaderLength; /*! The position where the data starts. */ int dataStartPos; /*! The size of each row. */ int rowSize; }; #endif affxparser/src/fusion/file/TagValuePair.h0000644000175200017520000000407314516003651021450 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _TagValuePair_HEADER_ #define _TagValuePair_HEADER_ /*! \file TagValuePair.h This file provides types to hold tag/value parameters. */ #include #include #include #include // /*! Defines a name/value parameter. */ typedef struct _TagValuePairType { /*! The name (tag) of the parameter. */ std::string Tag; /*! The value of the parameter. */ std::string Value; /*! Assignment operator. * @param vp The parameter to copy. * @return The copied parameter. */ _TagValuePairType operator=(_TagValuePairType vp) { Tag = vp.Tag; Value = vp.Value; return *this; } /*! Comparison operator. * @param vp The parameter to compare. * @return True if the tag values are the same. */ bool operator==(_TagValuePairType vp) { if (vp.Tag == Tag) return true; return false; } /*! Comparison operator. * @param tag The tag to compare. * @return True if the tag values are the same. */ bool operator==(const char *tag) { if (Tag == tag) return true; return false; } } TagValuePairType; /*! An STL vector of parameters. */ typedef std::vector TagValuePairTypeVector; /*! An STL list of tag/value parameters */ typedef std::list TagValuePairTypeList; #endif affxparser/src/fusion/file/TsvFile/0000755000175200017520000000000014516022540020321 5ustar00biocbuildbiocbuildaffxparser/src/fusion/file/TsvFile/BgpFile.cpp0000644000175200017520000001601514516003651022342 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * \file BgpFile.cpp * \brief Definition of the BGP file profile * Read \link file-format-tsv the TsvFile docs \endlink more about TsvFile. */ /** \page file-format-bgp File Format: BGP (NON-OFFICIAL-RELEASE)
DRAFT
The BGP (background probe file) lists which probes to use for background correction. Currently this file is used with the PM-GCBG pm adjustment method implemented in apt-probeset-summarize. The BGP file is based on version 1 of the \link file-format-tsv TSV file format \endlink. \section specs Specs & Features - Required headers - chip_type: indicates the chip type (as stored in the CEL file) which the clf file describes. Multiple chip_type headers may be present indicating the clf file can be used with any one of the listed chip types. - lib_set_name: indicates the name of a collection of related library files for a given chip. For example, PGF and CLF files intended to be used together should have the same lib_set_name. Only a single lib_set_name is allowed. - lib_set_version: indicates the version of a collection of related library files for a given chip. For example, PGF and CLF files intended to be used together should have the same lib_set_version. Only a single lib_set_version is allowed. - Optional headers - create_date: timestamp for when the file was created - guid: a unique identifier for the fileA - other headers may be present - Required columns - probe_id: the ID of the probe to use for background correction - Optional columns - probeset_id: The ID of the probeset in the PGF file to which the probe belongs. When using a bgp file with a CDF file, the probe ID is in column major order starting at 1. See \link VIGNETTE-expression-mask-probes \endlink for more details. - probeset_type: See type field at probeset level in PGF documentation - atom_id: The ID of the atom to which the probe belongs to - probe_type: See type field at the probe level in PGF documentation - gc_count: The numbers of G's and C's in the probe - probe_length: The length of the probe in base pairs - probe_sequence: See the probe_sequence info in the PGF documentation - x: The column coordinate of the probe in the CEL file - y: The row coordinate of the probe in the CEL file - other columns may be present \section implementation Parsing and Writing The official C++ parser used by affy can be found in APT under sdk/file/TsvFile/BgpFile.h. When possible, parsing and writing of BGP files should be done using this code. \section notes Notes The optional fields listed above are frequently present. This is in part due to the fact that apt-dump-pgf is used to generate the BGP files. \section bgpExample1 Example 1 \verbatim #%chip_type=HuEx-1_0-st-v2 #%chip_type=HuEx-1_0-st-v1 #%chip_type=HuEx-1_0-st-ta1 #%lib_set_name=HuEx-1_0-st #%lib_set_version=r2 #%create_date=Tue Sep 19 15:18:05 PDT 2006 #%guid=0000008635-1158704285-2136082293-1007618462-0634896595 probeset_id probeset_type atom_id probe_id probe_type gc_count probe_length probe_sequence x y 4057134 control->bgp->antigenomic 70013 78973 pm:st 4 25 TATTAAAATTAGAGCTATTAACAAT 2172 30 4057134 control->bgp->antigenomic 70013 125043 pm:st 4 25 AACTTATCTAAATGCTATTAAAAAT 2162 48 4057134 control->bgp->antigenomic 70013 148149 pm:st 4 25 TGATATAATAAGCTAATTTGTTTAA 2228 57 4057134 control->bgp->antigenomic 70013 152379 pm:st 4 25 ATTATAATAGTTAGCAACTTAATAT 1338 59 4057134 control->bgp->antigenomic 70013 178557 pm:st 4 25 CATTTATTAGTATTAAAGCAAATAT 1916 69 4057134 control->bgp->antigenomic 70013 178785 pm:st 4 25 AATTATTAAAACAAGCTTATATATC 2144 69 \endverbatim \section related Related Pages - TsvFile Design Notes - TSV File Format */ #include "file/TsvFile/BgpFile.h" // declare the namespacen namespace affx { /// @brief Create the BgpFile object BgpFile::BgpFile() { clear(); makeBgpBindings(); } /// @brief Destroy a BgpFile object BgpFile::~BgpFile() { } void BgpFile::clear() { probe_id=0; probeset_id_1=0; atom_id=0; exon_position=0; gc_count=0; interrogation_position=0; probe_length=0; probe_sequence=""; probeset_id_2=0; probeset_name=""; type=""; x=0; y=0; } /// @brief Make the bindings which define a BGP file void BgpFile::makeBgpBindings() { m_tsv.unbindAll(); // m_tsv.bind(0, "probe_id", &probe_id, TSV_BIND_REQUIRED); m_tsv.bind(0, "probeset_id", &probeset_id_1); m_tsv.bind(0, "atom_id", &atom_id); m_tsv.bind(0, "exon_position", &exon_position); m_tsv.bind(0, "gc_count", &gc_count); m_tsv.bind(0, "interrogation_position", &interrogation_position); m_tsv.bind(0, "probe_length", &probe_length); m_tsv.bind(0, "probe_sequence", &probe_sequence); //m_tsv.bind(0,8,&probeset_id_2); m_tsv.bind(0, "probeset_name", &probeset_name); m_tsv.bind(0, "type", &type); m_tsv.bind(0, "x", &x); m_tsv.bind(0, "y", &y); } /// @brief Open a BGP file /// @param filename the pathname of the file /// @returns tsv_return_t int BgpFile::open(const std::string& filename) { return m_tsv.open(filename); } /// @brief Start the write to a file /// @param filename /// @returns tsv_return_t int BgpFile::write(const std::string& filename) { return m_tsv.writeTsv(filename); } /// @brief Close a PGF file /// @returns tsv_return_t int BgpFile::close() { return m_tsv.close(); } /// @brief Rewind back to the start of the BGP data /// @returns tsv_return_t int BgpFile::rewind() { return m_tsv.rewind(); } /// @brief Skip to the next probe /// @returns TSV_LEVEL_LAST when out of probesets int BgpFile::next_bgprobe() { return m_tsv.nextLevel(0); } /// @brief Define a tsv file as a BGP file. /// @param tsv pointer to tsv to define /// @returns tsv_return_t int BgpFile::defineFileBgp(TsvFile* tsv) { assert(tsv != NULL); return tsv->defineFile("probe_id\tprobeset_id\tatom_id\texon_position\t" "gc_count\tinterrogation_position\tprobe_length\t" "probe_sequence\tprobeset_id\tprobeset_name\ttype\tx\ty"); } /// @brief Define this file as a pgf file /// @returns tsv_return_t int BgpFile::defineFileBgp() { return defineFileBgp(&m_tsv); } }; affxparser/src/fusion/file/TsvFile/BgpFile.h0000644000175200017520000000542114516003651022006 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * \file BgpFile.h * \brief Definition of the BGP file profile * Read \link file-format-tsv the TsvFile docs \endlink more about TsvFile. */ #ifndef _BGP_FILE_H #define _BGP_FILE_H // #include "file/TsvFile/TsvFile.h" // #include // // declare the namespace namespace affx { class BgpFile; }; /// To be called a BGP file, the file requires class affx::BgpFile { public: affx::TsvFile m_tsv; ///< public so people can work with it //#%header0=probset_id probe_id int probe_id; int probeset_id_1; int atom_id; int exon_position; int gc_count; int interrogation_position; int probe_length; std::string probe_sequence; int probeset_id_2; std::string probeset_name; std::string type; int x; int y; /// @brief Create the BgpFile object BgpFile(); /// @brief Destroy a BgpFile object ~BgpFile(); /// @brief Clears all the member vars. void clear(); /// @brief Make the bindings which define a BGP file void makeBgpBindings(); /// @brief Open a BGP file /// @param filename the pathname of the file /// @returns tsv_return_t int open(const std::string& filename); /// @brief Start the write to a file /// @param filename /// @returns tsv_return_t int write(const std::string& filename); /// @brief Close a PGF file /// @returns tsv_return_t int close(); /// @brief Rewind back to the start of the BGP data /// @returns tsv_return_t int rewind(); /// @brief Skip to the next probe /// @returns TSV_LEVEL_LAST when out of probesets int next_bgprobe(); /// @brief Define a tsv file as a BGP file. /// @param tsv pointer to tsv to define /// @returns tsv_return_t int defineFileBgp(TsvFile* tsv); /// @brief Define this file as a pgf file /// @returns tsv_return_t int defineFileBgp(); }; #endif // TsvFile affxparser/src/fusion/file/TsvFile/ClfFile.cpp0000644000175200017520000002044714516003651022342 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file ClfFile.cpp * @brief * Read \link file-format-clf the ClfFile format \endlink for an overview. * Read \link file-format-tsv the TsvFile docs \endlink more about TsvFile. */ #include "file/TsvFile/ClfFile.h" /** \page file-format-clf File Format: CLF (NON-OFFICIAL-RELEASE)
DRAFT
The CLF (cel layout file) maps probe IDs to a particular location in the CEL file. The CLF file is based on version 2 of the TSV file format. The CLF file is based on version 1 of the \link file-format-tsv TSV file format \endlink. \section specs Specs & Features - Required headers - chip_type: indicates the chip type (as stored in the CEL file) which the clf file describes. Multiple chip_type headers may be present indicating the clf file can be used with any one of the listed chip types. - lib_set_name: indicates the name of a collection of related library files for a given chip. For example, PGF and CLF files intended to be used together should have the same lib_set_name. Only a single lib_set_name is allowed. - lib_set_version: indicates the version of a collection of related library files for a given chip. For example, PGF and CLF files intended to be used together should have the same lib_set_version. Only a single lib_set_version is allowed. - clf_format_version: currently the only documented and supported version is 1.0. - rows: the number of rows in the CEL file (1 base) - cols: the number of cols in the CEL file (1 base) - header0: indicates the values in the CLF file. A CLF file must contain probe_id, x, and y columns. The probe_id column must be unique. Other columns may be present. - Optional headers - sequential: When present, this header indicates that the relationship between probe IDs and x/y location is deterministic. The value of this key is the starting ID. See note below about Deterministic IDs. If sequential is specified, then the order header must also be supplied. The value must be an integer >= 0. - order: Indicates whether the IDs are enumerated in a column or row major order. Valid values are "col_major" and "row_major". If order is specified, then sequential must also be specified. - create_date: timestamp for when the file was created - guid: a unique identifier for the fileA - other headers may be present - Required columns - per TSV format, order of columns is not guarateed - per TSV format, additional columns may be present - probe_id: A positive integer ID for the probe. Must be unique within the CLF file. - x: the x position of the probe in the CEL file. (0 base) - y: the y position of the probe in the CEL file. (0 base) \section deterministic Deterministic IDs In many cases, probe IDs are simply enumerated starting with a fixed number by walking through the CEL file in a column or row major order. When this is the case, hints may be provided in the headers which allow the location to be computed from the probe_id. This saves the work of reading the file. When sequential is specified along with col_major order, the the following determines probe IDs: \code probe_id = y*cols + x + sequential \endcode And for row_major: \code probe_id = x*rows + y + sequential \endcode \section implementation Parsing and Writing The official C++ parser used by affy can be found in APT under sdk/file/TsvFile/ClfFile.h. When possible, parsing and writing of CLF files should be done using this code. \section notes Notes ChipLayout used by APT (in sdk/chipstream) will currently only accept CLF files for which: - sequential=1 - order=col_major - order=row_major In fact all the coordinates are done as row_major and the library files with col_major in the header are due to a bug. Please use row_major going forward. \section clfExample1 Example 1 \verbatim #%chip_type=HuEx-1_0-st-v2 #%chip_type=HuEx-1_0-st-v1 #%chip_type=HuEx-1_0-st-ta1 #%lib_set_name=HuEx-1_0-st #%lib_set_version=r2 #%create_date=Tue Sep 19 15:18:05 PDT 2006 #%guid=0000008635-1158704285-0732263232-1857033251-0689718480 #%clf_format_version=1.0 #%rows=2560 #%cols=2560 #%sequential=1 #%order=col_major #%header0=probe_id x y 1 0 0 2 1 0 3 2 0 4 3 0 5 4 0 6 5 0 7 6 0 ... \endverbatim \section related Related Pages - TsvFile Design Notes - TSV File Format */ namespace affx { /// @brief Create the ClfFile object ClfFile::ClfFile() { makeClfBindings(); } /// @brief Destroy a ClfFile object ClfFile::~ClfFile() { } /// int ClfFile::getXMax() { int val; int rv; rv = m_tsv.getHeader("rows", val); Err::check(rv == TSV_OK, "No 'rows' header found in clf file"); return val -1; // 1-Based to 0-based } int ClfFile::getYMax() { int val; int rv; rv = m_tsv.getHeader("cols", val); Err::check(rv == TSV_OK, "No 'cols' header found in clf file"); return val -1; // 1-Based to 0-based } /// @brief Return the value of the sequential header /// @return int value of the header or -1 if not present int ClfFile::getSequential() { int val; int rv; rv = m_tsv.getHeader("sequential", val); if (rv == TSV_OK) { Err::check(val >= 0, "sequential header in clf file must be a value >= 0"); return val; } else return -1; } /// @brief Return the value of the order header /// @return the value of the order header if present std::string ClfFile::getOrder() { std::string val; int rv; rv = m_tsv.getHeader("order", val); if (rv == TSV_OK) { Err::check(val.compare("row_major") == 0 || val.compare("col_major") == 0, "order header in clf file must be set to 'row_major' (old mislabeled 'col_major' is tolerated due to old bug)"); return val; } else { val.erase(); return val; } } /// @brief Make the bindings which define a CLF file void ClfFile::makeClfBindings() { m_tsv.unbindAll(); // m_tsv.bind(0, "probe_id", &probe_id, TSV_BIND_REQUIRED); m_tsv.bind(0, "x", &x, TSV_BIND_REQUIRED); m_tsv.bind(0, "y", &y, TSV_BIND_REQUIRED); } /// @brief Open a CLF file /// @param filename the pathname of the file /// @return tsv_return_t int ClfFile::open(const std::string& filename) { std::string val; int rc = m_tsv.open(filename); if (rc == TSV_OK) { int rv = m_tsv.getHeader("clf_format_version", val); Err::check(rv == TSV_OK, "No clf_format_version header in clf file"); Err::check(val.compare("1.0") == 0, "Unable to handle clf file format"); } return rc; } /// @brief Start the write to a file /// @param filename /// @return tsv_return_t int ClfFile::write(const std::string& filename) { return m_tsv.writeTsv(filename); } /// @brief Close a PGF file /// @return tsv_return_t int ClfFile::close() { return m_tsv.close(); } /// @brief Rewind back to the start of the CLF data /// @return tsv_return_t int ClfFile::rewind() { return m_tsv.rewind(); } /// @brief Skip to the next probe /// @return TSV_LEVEL_LAST when out of probesets int ClfFile::next_probe() { return m_tsv.nextLevel(0); } /// @brief Define a tsv file as a CLF file. /// @param tsv pointer to tsv to define /// @return tsv_return_t int ClfFile::defineFileClf(TsvFile* tsv) { Err::check(tsv != NULL, "affx::ClfFile - no tsv file provided to defineFileClf"); return tsv->defineFile("probe_id\tx\ty"); } /// @brief Define this file as a pgf file /// @return tsv_return_t int ClfFile::defineFileClf() { return defineFileClf(&m_tsv); } }; affxparser/src/fusion/file/TsvFile/ClfFile.h0000644000175200017520000000574014516003651022006 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file ClfFile.h * @brief Headers for the CLF file class * Read \link file-format-clf the ClfFile format \endlink for an overview. * Read \link file-format-tsv the TsvFile docs \endlink more about TsvFile. */ #ifndef _CLF_FILE_H #define _CLF_FILE_H // #include "file/TsvFile/TsvFile.h" // #include "util/Err.h" // namespace affx { class ClfFile; }; /// To be called a CLF file, the file requires class affx::ClfFile { public: affx::TsvFile m_tsv; ///< public so people can work with it /// If you want the string forms, use "clf->tsv->get(0,"probe_id",probe_id_string)" //#%header0=probe_id x y int probe_id; ///< The probe id int x; ///< The x position of the probe int y; ///< The y position of the probe /// @brief Create the ClfFile object ClfFile(); /// @brief Destroy a ClfFile object ~ClfFile(); /// int getXMax(); int getYMax(); /// @brief Return the value of the sequential header /// @return int value of the header or -1 if not present int getSequential(); /// @brief Return the value of the order header /// @return the value of the order header if present std::string getOrder(); /// @brief Make the bindings which define a CLF file void makeClfBindings(); /// @brief Open a CLF file /// @param filename the pathname of the file /// @return tsv_return_t int open(const std::string& filename); /// @brief Start the write to a file /// @param filename /// @return tsv_return_t int write(const std::string& filename); /// @brief Close a PGF file /// @return tsv_return_t int close(); /// @brief Rewind back to the start of the CLF data /// @return tsv_return_t int rewind(); /// @brief Skip to the next probe /// @return TSV_LEVEL_LAST when out of probesets int next_probe(); /// @brief Define a tsv file as a CLF file. /// @param tsv pointer to tsv to define /// @return tsv_return_t int defineFileClf(TsvFile* tsv); /// @brief Define this file as a pgf file /// @return tsv_return_t int defineFileClf(); }; #endif // TsvFile affxparser/src/fusion/file/TsvFile/DumpPgf.cpp0000644000175200017520000005266514516003651022407 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /// @file DumpPgf.cpp /// @brief Class for dumping information from a pgf and clf file. #include "file/TsvFile/DumpPgf.h" // #include "util/Fs.h" #include "util/Guid.h" #include "util/Util.h" // #include // using namespace std; using namespace affx; void define_dumppgf_options(PgOptions* opts) { opts->setUsage("apt-dump-pgf - Dump information from a pgf file.\n" "Usage:\n" " apt-dump-pgf -o int.txt -c file.clf -p file.pgf [--probeset-type=type [--probeset-type=...]]\n\n" " apt-dump-pgf -o int.txt -c file.clf -p file.pgf [--probeset-ids=file [--probeset-ids=...]]\n\n" " apt-dump-pgf -o int.txt -c file.clf -p file.pgf [--probe-ids=file [--probe-ids=...]]" ); opts->defineOption("p", "pgf-file", PgOpt::STRING_OPT, "The pgf file used to dump information.", ""); opts->defineOption("c", "clf-file", PgOpt::STRING_OPT, "Optional clf file to use. When present, " "probe position will be included in the output.", ""); opts->defineOption("", "probeset-only", PgOpt::BOOL_OPT, "Dump only probeset level information.", "false"); opts->defOptMult("", "probeset-type", PgOpt::STRING_OPT, "Optional probeset type to extract; " "can be specified multiple times. When specified " "multiple times, the intersection of all types " "is taken. The user cannot mix use of probeset-type, " "probeset-ids, and probe-ids.", ""); opts->defineOption("s", "probeset-ids", PgOpt::STRING_OPT, "Optional name of a file containing probeset ids " "to extract; can be specified multiple times. The " "user cannot mix use of probeset-type, probeset-ids, " "and probe-ids.", ""); opts->defineOption("", "probe-ids", PgOpt::STRING_OPT, "Optional name of a file containing probe ids " "to extract; can be specified multiple times. The " "user cannot mix use of probeset-type, probeset-ids, " "and probe-ids.", ""); opts->defineOption("", "or", PgOpt::BOOL_OPT, "Use the union of the types requested, not the intersection.", "false"); opts->defineOption("o", "out-file", PgOpt::STRING_OPT, "Output file to contain the dump output.", ""); opts->defineOption("", "version", PgOpt::BOOL_OPT, "Display version information.", "false"); opts->defineOption("h", "help", PgOpt::BOOL_OPT, "Print help message.", "false"); } /** * @brief Constructor. * * @param argc Number of command line arguments. * @param argv Command line arguments. * @param version Version string. * * Errors: throw exception to display help messages, if unsupported * option choices were made. */ dumpPgf::dumpPgf (const char* argv[], const std::string& version) : m_Version (version), m_GetProbeCoordinates (0) { // Prefer throw() to exit(). Err::setThrowStatus (true); // //m_Opts = new PgOptions (); define_dumppgf_options(&m_Opts); m_Opts.parseArgv(argv); // The command line is copied to output. m_CommandLine=m_Opts.commandLine(); // Optionally display usage message. if (m_Opts.getBool("help") || (m_Opts.argc() <= 1)) { m_Opts.usage(); cout << "version: " << version << "\n"; exit(0); } // Optionally display version. if (m_Opts.getBool("version")) { cout << "version: " << version << "\n"; exit(0); } // Require pgf file. m_PgfFileName = m_Opts.get("pgf-file"); if (m_PgfFileName.empty()) { string msg = "Must provide pgf file."; Err::errAbort (msg); } // Optional clf file. m_ClfFileName = m_Opts.get("clf-file"); PgOpt* opt; // Save optional types. opt = m_Opts.mustFindOpt("probeset-type"); opt->push_user_values_into(m_ProbesetTypes); // Save optional probeset opt = m_Opts.mustFindOpt("probeset-ids"); opt->push_user_values_into(m_ProbesetIdFileNames); // and probe id file names. opt = m_Opts.mustFindOpt("probe-ids"); opt->push_user_values_into(m_ProbeIdFileNames); // Allow only one of probeset-type, probeset-ids, or probe-ids options. int optionsChosen = 0; optionsChosen += m_ProbesetTypes.empty() ? 0 : 1; optionsChosen += m_ProbesetIdFileNames.empty() ? 0 : 1; optionsChosen += m_ProbeIdFileNames.empty() ? 0 : 1; if (optionsChosen > 1) { string msg = "Cannot mix use of --probeset-ids, --probe-ids, "; msg += "and --probeset-type."; Err::errAbort (msg); } // Save optional probeset-only, or flags. m_DumpProbesetsOnly = m_Opts.getBool("probeset-only"); m_DumpUnion = m_Opts.getBool("or"); // Probeset-only is incompatible with a probe-ids list. if (m_DumpProbesetsOnly && !m_ProbeIdFileNames.empty()) Err::errAbort ("Cannot use --probeset-only with --probe-ids."); // Output to cout unless a writeable file was selected. m_Outfile = m_Opts.get("out-file"); if (m_Outfile.empty()) Err::errAbort ("Must provide an output file, --out-file option."); else { Fs::mustOpenToWrite(m_FileOut, m_Outfile); m_Out = &m_FileOut; } } /** * @brief Read, process data, write output. * * Errors: abort if unable to read any input file. */ void dumpPgf::run() { // Write initial output. beginOutput(); // Read optional probeset or probe id files. readIdFiles(); // Open input files. openInputFiles(); // Write output header. writeOutputHeader(); // Write lines matching selection criteria. writeMatches(); } /** * @brief Read optional probeset or probe id files. */ void dumpPgf::readIdFiles() { // Probeset id file(s). map probesetIdsMap; for (unsigned int i = 0; i < m_ProbesetIdFileNames.size(); ++i) { string& probesetIdFileName = m_ProbesetIdFileNames[i]; TsvFile tsv; int probesetId; tsv.bind (0, "probeset_id", &probesetId, TSV_BIND_REQUIRED); if (tsv.open (probesetIdFileName) != TSV_OK) Err::errAbort ("Problem opening probeset id file " + probesetIdFileName); while (tsv.nextLevel (0) == TSV_OK) // Ignore duplicate ids. if (probesetIdsMap.insert (make_pair (probesetId, true)) .second) m_ProbesetIds.push_back (probesetId); tsv.close(); } Verbose::out(1,"Found " + ToStr(m_ProbesetIds.size()) + " probesets in probeset list files."); // Probe id file(s). map probeIdsMap; for (unsigned int i = 0; i < m_ProbeIdFileNames.size(); ++i) { string& probeIdFileName = m_ProbeIdFileNames[i]; TsvFile tsv; int probeId; tsv.bind (0, "probe_id", &probeId, TSV_BIND_REQUIRED); if (tsv.open (probeIdFileName) != TSV_OK) Err::errAbort ("Problem opening probe id file " + probeIdFileName); while (tsv.nextLevel (0) == TSV_OK) // Ignore duplicate ids. if (probeIdsMap.insert (make_pair (probeId, true)) .second) m_ProbeIds.push_back (probeId); tsv.close(); } Verbose::out(1,"Found " + ToStr(m_ProbeIds.size()) + " probes in probe list files."); } /** * @brief Begin output. */ void dumpPgf::beginOutput() { Verbose::out(1,"MODULE: " + m_Version); Verbose::out(1,"CMD: " + m_CommandLine); m_ExecGuid = affxutil::Guid::GenerateNewGuid(); Verbose::out(1,"exec_guid " + m_ExecGuid); } /** * @brief Open input pgf, clf files. * * Errors: abort if a file could not be opened or if * a required column name was not found. */ void dumpPgf::openInputFiles() { Verbose::out(1,"Reading meta data from PGF and CLF files"); // Open pgf file. m_PgfTsv.bind (0, "probeset_id", &m_PgfProbesetId, TSV_BIND_REQUIRED); if (! m_DumpProbesetsOnly) m_PgfTsv.bind (2, "probe_id", &m_PgfProbeId, TSV_BIND_REQUIRED); if (m_PgfTsv.open (m_PgfFileName) != TSV_OK) Err::errAbort ("Problem opening file " + m_PgfFileName + "."); // If a clf file name was provided, open it. if (! m_ClfFileName.empty()) { if (m_ClfTsv.open (m_ClfFileName) != TSV_OK) Err::errAbort ("Problem opening file " + m_ClfFileName + "."); if (m_SequentialClf.isSequential (m_ClfTsv)) // If sequential, calculate coordinates by simple arithmetic. m_GetProbeCoordinates = &dumpPgf::getCoordsBySequentialClf; else { // Not sequential, use index to find coordinates. // Require probe_id, x, y columns. m_ClfTsv.bind (0, "probe_id", &m_ClfProbeId, TSV_BIND_REQUIRED); m_ClfTsv.bind (0, "x", &m_ClfX, TSV_BIND_REQUIRED); m_ClfTsv.bind (0, "y", &m_ClfY, TSV_BIND_REQUIRED); // Will be indexing over probe_id. m_ClfTsv.defineIndex (0, "probe_id", TSV_INDEX_INT, 0); m_GetProbeCoordinates = &dumpPgf::getCoordsByIndex; } } // If probeset-ids file(s) were provided, index over probeset_id. if (! m_ProbesetIdFileNames.empty()) m_PgfTsv.defineIndex (0, "probeset_id", TSV_INDEX_INT, 0); // Likewise for probe ids - already required that the user did not request both. if (! m_ProbeIdFileNames.empty()) m_PgfTsv.defineIndex (2, "probe_id", TSV_INDEX_INT, 0); // Set up output header, bind pgf file data. m_ProbesetDataCount = m_PgfTsv.getColumnCount(0); // Save probeset level data other than the probeset_id (int) // as strings. m_ProbesetData.resize (m_ProbesetDataCount - 1); unsigned int k = 0; bool foundType = false; for (unsigned int i = 0; i < m_ProbesetDataCount; ++i) { string colName; m_PgfTsv.cidx2cname (0, i, colName); m_OutputColNames.push_back (colName); // Probeset_id is an int, type and probeset_name are strings. // To make generating output simpler, require that the // first column is the probeset_id. assert (! ((i == 0) && (colName != "probeset_id")) ); if (colName != "probeset_id") { if (colName == "type") { m_ProbesetTypeCol = k; foundType = true; } m_PgfTsv.bind (0, i, &m_ProbesetData[k++]); } } // Subtract probeset_id column from data count. --m_ProbesetDataCount; // Require type column if user requested -probeset-type. if (! m_ProbesetTypes.empty() && ! foundType) Err::errAbort ("No type column in pgf file"); // Bind atom, probe level data unless -probeset-only selected. if (m_DumpProbesetsOnly) m_AtomDataCount = m_ProbeDataCount = 0; else { m_AtomDataCount = m_PgfTsv.getColumnCount(1); m_ProbeDataCount = m_PgfTsv.getColumnCount(2); // Save atom and probe level data other than the probe_id (int) // as strings. m_AtomProbeData.resize (m_AtomDataCount + m_ProbeDataCount - 1); k = 0; for (unsigned int i = 0; i < m_AtomDataCount; ++i) { string colName; m_PgfTsv.cidx2cname (1, i, colName); m_OutputColNames.push_back (colName); m_PgfTsv.bind (1, i, &m_AtomProbeData[k++]); } for (unsigned int i = 0; i < m_ProbeDataCount; ++i) { string colName; m_PgfTsv.cidx2cname (2, i, colName); m_OutputColNames.push_back (colName); // Again, to make generating output simpler, require that // the first probe level column is the probe_id. assert (! ((i == 0) && (colName != "probe_id")) ); // Already bound probe_id. if (colName != "probe_id") m_PgfTsv.bind (2, i, &m_AtomProbeData[k++]); } // Subtract probe_id column from data count. --m_ProbeDataCount; // If using a clf file, will write x and y columns. if (! m_ClfFileName.empty()) { m_OutputColNames.push_back ("x"); m_OutputColNames.push_back ("y"); } } // end if (m_DumpProbesetsOnly) } /** * @brief Write output file header. */ void dumpPgf::writeOutputHeader() { // Generic apt meta tags. const string guid = affxutil::Guid::GenerateNewGuid(); *m_Out << "#%guid=" << guid << "\n"; *m_Out << "#%exec_guid=" << m_ExecGuid << "\n"; *m_Out << "#%exec_version=" << m_Version << "\n"; *m_Out << "#%create_date=" << Util::getTimeStamp() << "\n"; *m_Out << "#%cmd=" << m_CommandLine << "\n"; // Copy genome position file header meta tags to output. string key, value; m_PgfTsv.headersBegin(); while (m_PgfTsv.headersNext (key, value) == TSV_OK) // Propagate only chip_type, lib_set_version, and lib_set_name. if ( (key == "chip_type") || (key == "lib_set_version") || (key == "lib_set_name") ) *m_Out << "#%" << key << "=" << value << "\n"; // Write header line. unsigned int colNameCount = m_OutputColNames.size(); assert (colNameCount > 0); *m_Out << m_OutputColNames[0]; for (unsigned int i = 1; i < colNameCount; ++i) *m_Out << "\t" << m_OutputColNames[i]; *m_Out << "\n"; } /** * @brief Write lines matching selection criteria. */ void dumpPgf::writeMatches() { // If using a clf file, need a method for obtaining probe coordinates. if (! m_ClfFileName.empty()) assert (m_GetProbeCoordinates); // Indexing probesets. if (! m_ProbesetIdFileNames.empty()) { const unsigned int probesetIdCount = m_ProbesetIds.size(); for (unsigned int i = 0; i < probesetIdCount; ++i) { if(i==0) Verbose::out(1,"Indexing probesets in PGF file"); const int probesetId = m_ProbesetIds[i]; if (m_PgfTsv.findBegin (0, "probeset_id", TSV_OP_EQ, probesetId) != TSV_OK) Err::errAbort ("Problem reading pgf file " + m_PgfFileName); const int resultCount = m_PgfTsv.findResultsCount(); // Skip if no match. if (resultCount == 0) continue; // Fatal error if more than one match was found. else if (resultCount > 1) { string msg = "probeset_id '" + ToStr (probesetId); msg += "' is not a unique index. Duplicate probeset_id found, ["; msg += ToStr (probesetId) + "] for pgf file " + m_PgfFileName; Err::errAbort (msg); } // Found one match - read, write data. if (m_PgfTsv.findNext() != TSV_OK) Err::errAbort ("Problem reading pgf file " + m_PgfFileName); if(i==0) Verbose::out(1,"Dumping probeset info"); dumpProbesetData(); } } // Indexing probes. else if (! m_ProbeIdFileNames.empty()) { const unsigned int probeIdCount = m_ProbeIds.size(); for (unsigned int i = 0; i < probeIdCount; ++i) { const int probeId = m_ProbeIds[i]; if(i==0) Verbose::out(1,"Indexing probes in PGF file"); if (m_PgfTsv.findBegin (2, "probe_id", TSV_OP_EQ, probeId) != TSV_OK) Err::errAbort ("Problem reading pgf file " + m_PgfFileName); const int resultCount = m_PgfTsv.findResultsCount(); // Skip if no match. if (resultCount == 0) continue; // Fatal error if more than one match was found. else if (resultCount > 1) { string msg = "probe_id '" + ToStr (probeId); msg += "' is not a unique index. Duplicate probe_id found, ["; msg += ToStr (probeId) + "] for pgf file " + m_PgfFileName; Err::errAbort (msg); } // Found one match - read, write data. if (m_PgfTsv.findNext() != TSV_OK) Err::errAbort ("Problem reading pgf file " + m_PgfFileName); if(i==0) Verbose::out(1,"Dumping probe info"); dumpProbeData(); } } // Dumping by probeset type. else if (! m_ProbesetTypes.empty()) { Verbose::out(1,"Scanning PGF file probe for requested type(s)"); const vector::const_iterator userTypesBegin = m_ProbesetTypes.begin(); const vector::const_iterator userTypesEnd = m_ProbesetTypes.end(); const unsigned int userTypesCount = m_ProbesetTypes.size(); // Types in line read from pgf file. vector pgfFileTypes; // User entered -or. if (m_DumpUnion) while (m_PgfTsv.nextLevel (0) == TSV_OK) { // Split type by '->'. splitTypes (m_ProbesetData[m_ProbesetTypeCol], pgfFileTypes); // Dump this probeset_id if pgf file type matches any of those specified by the user. const unsigned int pgfFileTypesCount = pgfFileTypes.size(); for (unsigned int i = 0; i < pgfFileTypesCount; ++i) if (find (userTypesBegin, userTypesEnd, pgfFileTypes[i]) != userTypesEnd) { dumpProbesetData(); break; } } // end while (m_PgfTsv.nextLevel (0) == TSV_OK) // Default 'and' - require pgf file types to include all of those specified by the user. else while (m_PgfTsv.nextLevel (0) == TSV_OK) { splitTypes (m_ProbesetData[m_ProbesetTypeCol], pgfFileTypes); const vector::const_iterator pgfFileTypesBegin = pgfFileTypes.begin(); const vector::const_iterator pgfFileTypesEnd = pgfFileTypes.end(); bool noMismatchFound = true; // Dump this probeset_id if type matches all of those specified by the user. for (unsigned int i = 0; i < userTypesCount; ++i) { if (find (pgfFileTypesBegin, pgfFileTypesEnd, m_ProbesetTypes[i]) == pgfFileTypesEnd) { // If one of the user requested types is not present in this pgf file line, // don't write the line. noMismatchFound = false; break; } } if (noMismatchFound) dumpProbesetData(); } // end while (m_PgfTsv.nextLevel (0) == TSV_OK) } // None of the above - dump the entire pgf file. else { Verbose::out(1,"Dumping entire PGF file"); while (m_PgfTsv.nextLevel (0) == TSV_OK) dumpProbesetData(); } } /** * @brief Split type by '->'. */ void dumpPgf::splitTypes (const std::string& inputString, std::vector& inputTypes) { // Want to reuse the output vector; clear it first. inputTypes.clear(); size_t substrBegin = 0; for (;;) { size_t substrEnd = inputString.find ("->", substrBegin); if (substrEnd == string::npos) { // No more '->' - save what's left, quit. string subString = inputString.substr (substrBegin); // Avoid returning a null string from a terminating '->' or an empty inputString. if (! subString.empty()) inputTypes.push_back (subString); break; } // Avoid null strings from an initial '->' or '->->'. if (substrEnd != substrBegin) inputTypes.push_back (inputString.substr (substrBegin, substrEnd - substrBegin) ); // Continue following the '->' substrBegin = substrEnd + 2; } } /** * @brief Dump information for a given probeset id. */ void dumpPgf::dumpProbesetData() { // If dumping probesets only, write out data. if (m_DumpProbesetsOnly) { *m_Out << m_PgfProbesetId; for (unsigned int j = 0; j < m_ProbesetDataCount; ++j) *m_Out << "\t" << m_ProbesetData[j]; *m_Out << "\n"; return; } // Read, dump atom, probe level data. while (m_PgfTsv.nextLevel (1) == TSV_OK) while (m_PgfTsv.nextLevel (2) == TSV_OK) dumpProbeData(); } /** * @brief Dump information for a given probe id. */ void dumpPgf::dumpProbeData() { *m_Out << m_PgfProbesetId; for (unsigned int j = 0; j < m_ProbesetDataCount; ++j) *m_Out << "\t" << m_ProbesetData[j]; // Keeping atom, probe level data, excluding probe_id, in // the m_AtomProbeData vector. unsigned int k = 0; for (unsigned int j = 0; j < m_AtomDataCount; ++j) *m_Out << "\t" << m_AtomProbeData[k++]; // Previously required that the probe_id column be // first in the probe level data. *m_Out << "\t" << m_PgfProbeId; for (unsigned int j = 0; j < m_ProbeDataCount; ++j) *m_Out << "\t" << m_AtomProbeData[k++]; // Dump x, y columns if a clf file is in use. if (! m_ClfFileName.empty()) { (this->*m_GetProbeCoordinates) (m_PgfProbeId); *m_Out << "\t"; // Write null coordinates if not defined. if (m_ClfX >= 0) *m_Out << m_ClfX; *m_Out << "\t"; if (m_ClfY >= 0) *m_Out << m_ClfY; } *m_Out << "\n"; } /** * @brief Calculate probe coordinates using SequentialClfFile. */ void dumpPgf::getCoordsBySequentialClf (const int probeId) { m_SequentialClf.getProbeCoordinates (probeId, m_ClfX, m_ClfY); } /** * @brief Calculate probe coordinates using index. */ void dumpPgf::getCoordsByIndex (const int probeId) { if (m_ClfTsv.findBegin (0, "probe_id", TSV_OP_EQ, m_PgfProbeId) != TSV_OK) Err::errAbort ("Problem reading clf file " + m_ClfFileName); const int resultCount = m_ClfTsv.findResultsCount(); // Fatal error if more than one match was found. if (resultCount > 1) { string msg = "probe_id '" + ToStr (m_PgfProbeId); msg += "' is not a unique index. Duplicate probe_id found, ["; msg += ToStr (m_PgfProbeId) + "] for clf file " + m_ClfFileName; Err::errAbort (msg); } else if (resultCount == 1) { // Found one match - read x, y. if (m_ClfTsv.findNext() != TSV_OK) Err::errAbort ("Problem reading clf file " + m_ClfFileName); } // Write null x, y coordinates if no match found. else m_ClfX = m_ClfY = -1; } affxparser/src/fusion/file/TsvFile/DumpPgf.h0000644000175200017520000001140514516003651022037 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef DUMP_PGF_H #define DUMP_PGF_H // #include "file/TsvFile/SequentialClfFile.h" #include "file/TsvFile/TsvFile.h" // #include "util/Err.h" #include "util/PgOptions.h" #include "util/Util.h" // #include #include #include #include #include // /** * @file DumpPgf.h * @brief Headers for DumpPgf.cpp. */ class dumpPgf { public: /** Constructor. * @param argc Number of command line arguments. * @param argv Command line arguments. * @param version Version string. */ dumpPgf (const char* argv[], const std::string& version); /** Destructor. */ ~dumpPgf() { clear(); } /** Read, process data, write output. */ void run(); private: /** Clear data. */ void clear() { //delete m_Opts; } /** Read optional transcript cluster id and probeset id files. */ void readIdFiles (void); /** Begin output. */ void beginOutput (void); /** Open input files. */ void openInputFiles (void); /** Write output file header. */ void writeOutputHeader (void); /** Write lines matching selection criteria. */ void writeMatches (void); /** Dump information for a given probeset id. */ void dumpProbesetData (void); /** Dump information for a given probe id. */ void dumpProbeData (void); /** Split type by '->'. */ void splitTypes (const std::string& inputString, std::vector& inputTypes); /** Calculate probe coordinates using SequentialClfFile. * * @param probeId Probe id. */ void getCoordsBySequentialClf (const int probeId); /** Calculate probe coordinates using index. * * @param probeId Probe id. */ void getCoordsByIndex (const int probeId); /// private data /// Version string. const std::string& m_Version; /// Command line options. public: PgOptions m_Opts; private: /// Command line as a string. std::string m_CommandLine; /// Pgf file name. std::string m_PgfFileName; /// Clf file name. std::string m_ClfFileName; /// Probeset type(s). std::vector m_ProbesetTypes; /// Probeset id file name(s). std::vector m_ProbesetIdFileNames; /// Probe id file name(s). std::vector m_ProbeIdFileNames; /// Should probeset level information only be dumped? bool m_DumpProbesetsOnly; /// Should the union of the types requested be dumped? bool m_DumpUnion; /// Pgf file tsv object. affx::TsvFile m_PgfTsv; /// Clf file tsv object. affx::TsvFile m_ClfTsv; /// Sequential clf file object. SequentialClfFile m_SequentialClf; /// Column names for output. std::vector m_OutputColNames; /// Number of probeset (level 0) data columns. unsigned int m_ProbesetDataCount; /// Number of atom (level 1) data columns. unsigned int m_AtomDataCount; /// Number of probe (level 2) data columns. unsigned int m_ProbeDataCount; /// Probeset (level 0) data, excluding probeset_id. std::vector m_ProbesetData; /// Atom and probe (levels 1 and 2) data, excluding probe_id. std::vector m_AtomProbeData; /// Execution guid. std::string m_ExecGuid; /// Probeset ids requested. std::vector m_ProbesetIds; /// Probe ids requested. std::vector m_ProbeIds; /// Pgf file probeset_id. int m_PgfProbesetId; /// Pgf file probe_id. int m_PgfProbeId; /// Column containing probeset type. unsigned int m_ProbesetTypeCol; /// Clf file probe_id. int m_ClfProbeId; /// Clf file x value. int m_ClfX; /// Clf file y value. int m_ClfY; /// Output file name. std::string m_Outfile; /// Output stream. std::ostream* m_Out; /// File output stream. std::ofstream m_FileOut; // Follow Stroustrup third edition section 15.5 'pointers to members'. typedef void (dumpPgf::* PMemProbeCoord_t) (const int probeId); // Pointer to function to determine the probe coordinates. PMemProbeCoord_t m_GetProbeCoordinates; }; #endif /* DUMP_PGF_H */ affxparser/src/fusion/file/TsvFile/PgfFile.cpp0000644000175200017520000003527714516003651022361 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * \file PgfFile.cpp * \brief * Read \link file-format-pgf the PgfFile format \endlink for an overview. * Read \link file-format-tsv the TsvFile docs \endlink more about TsvFile. */ /** \page file-format-pgf File Format: PGF (NON-OFFICIAL-RELEASE)
DRAFT
The PGF (probe group file) provides information about what probes are contained within a probeset and information about the nature of the probes necessary for analysis. The current PGF file format (version 1) is only specified for expression style probesets. The PGF file is based on version 2 of the \link file-format-tsv TSV file format \endlink. \section specifications Specifications - Required headers - chip_type: indicates the chip type (as stored in the CEL file) which the pgf file describes. Multiple chip_type headers may be present indicating the pgf file can be used with any one of the listed chip types. - lib_set_name: indicates the name of a collection of related library files for a given chip. For example, PGF and CLF files intended to be used together should have the same lib_set_name. Only a single lib_set_name is allowed. - lib_set_version: indicates the version of a collection of related library files for a given chip. For example, PGF and CLF files intended to be used together should have the same lib_set_version. Only a single lib_set_version is allowed. - pgf_format_version: currently the only documented and supported version is 1.0. - header0: indicates the values in the pgf file for the top level data (probeset level). A pgf file probeset level entry must contain probeset_id. This field must be unique over all the probeset_ids in the PGF file. Optional, but typical fields include type and probeset_name. - header1: the second level of data in the PGF file (atom level). An atom refers to a particular collection of probes that are interrogating the same position. For expression arrays an atom is usually a probe pair (pm and mm probe pair) from an array like HG-U133 Plus 2.0 or a single pm probe from an array like Human Exon 1.0 ST which does not contain mismatch probes. The atom level data must have an atom_id field. atom_ids are expected to be unique within the PGF file. In other words, atoms cannot be present in more than one probeset or present multiple times in any given probeset. - header2: the third level of data (probe level) contains information about a particular probe in a particular probeset. A probe may be present in more than one probeset, but it may only appear once in a given probeset. Required fields include probe_id and type. Optional, but typical fields include gc_count, probe_length, interrogation_position, and probe_sequence. A probe_id is not required to be unique within the PGF file -- ie that probe may be used in another probeset. - Optional headers - create_date: timestamp for when the file was created - guid: a unique identifier for the fileA - other headers may be present - Columns - per TSV format, order of columns is not guarateed - per TSV format, additional columns may be present - Probeset Level (level 0) - probeset_id (required): an integer id >= 0 which is unique over all the probeset_ids in the pgf file - type (optional): provides classification information for the probeset. See Type section. - probeset_name (optional) - Atom Level (level 1) - atom_id (required): an integer id >= 0 which is unique over all the atom_ids in the pgf file - type (optional): provides classification information for the atom. See Type section. - exon_position (optional): the position of the probe interrogation position relative to the target sequence - Probe Level (level 2) - probe_id (required): an integer id >= 0 which is a foreign key into the CLF file; a specific probe may be present in more than one probeset and as such is not guarateed to be unique in the PGF file. Also note that the additional columns of information at the probe level may be context dependent. So for example a particular probe could potentially be a PM probe in one probeset and an MM probe in another. While unlikely, this is a possibility. - type (required): provides classification information for the probe. See the Type section. - gc_count (optional): the number of G and C bases in the probe - probe_length (optional): the length of the probe - interrogation_position (optional): the interrogation position of the probe (typically 13 for 25-mer PM/MM probes) - probe_sequence (optional): the sequence of the probe on the array in from array surface to solution. For most standard Affymetrix arrays at this writing, this would be in a 3' to 5' direction. So for a sense target (st) probe (see the type field for the probe level) you would need to complement the sequence in this field before looking for matches to transcript sequences; for an antisense target (at) you would need to reverse this sequence. \section types Types Type columns in PGF files use the following string format to catagorize probesets, atoms, and probes: \verbatim simple_type:=[a-z0-9\_\-]+ \endverbatim So an example simple type \verbatim pm mm st at control affx spike \endverbatim Furthermore, types can be nested. For example a particular spike may be from Affymetrix and is intended for use as a control. As a result you would combine the simple types to reflect this: \verbatim control->affx->spike \endverbatim Thus \verbatim nested_type:=(simple_type|nested_type)->(simple_type) \endverbatim Lastly, a given probeset, atom, or probe may belong to multiple independent types. For example, a probeset may be both a normalization control gene and part of the main design: \verbatim normgene->exon:main \endverbatim Thus \verbatim compound_type:=(simple_type|nested_type|compound_type):(simple_type|nested_type) \endverbatim Currently type values are not strongly enumerated. Values used in current commercial PGF files include: - Probeset Type: - main: probesets which are a part of the main design for which the array was designed - normgene->exon: probe sets against exon regions of a set of housekeeping genes - normgene->intron: probe sets against intron regions of a set of housekeeping genes - control->af‍fx: standard Affymetrix spike control probeset (ie bacterial and polyA spikes) - control->bgp->antigenomic: antigenomic background probes - control->bgp->genomic: genomic background probes - control->spike->arabidopsis: arabidopsis spike control probesets - rescue->FLmRNA->unmapped: probesets against mRNA sequences which did not align to the genome - types removed in more recent PGF files: - control->QC: manufacturing control probesets - control->chip: internal control probesets used for gridding and other internal functions - Atom Type - (none) - Probe Type - pm: perfect match probe relative to intended target - mm: specific mismatch probe - st: a probe designed for sense target - at: a probe designed for antisense target - types removed in more recent PGF files: - blank: a blank feature - generic: an internal control feature - jumbo-checkerboard: an internal gridding feature - thermo: an internal control feature - trigrid: an internal gridding feature \section implementation Parsing and Writing The official C++ parser used by affy can be found in APT under sdk/file/TsvFile/PgfFile.h. When possible, parsing and writing of PGF files should be done using this code. \section notes Notes Specific applications may require extra/optional columns in the PGF file. Thus a valid PGF file may fail for a particular application or analysis algorithm because the information needed by that application and/or algorithm is not contained in the PGF file. It should be noted that there is no significance to the ordering of probes within atoms and atoms within probesets or even probesets within the PGF file. IDs do not have to be unique between different levels. In other words, the ID space for probeset_ids is separate from the ID space for atom_ids and probe_ids. \section pgfExample1 Example 1 -- Human Exon 1.0 ST PGF File Excerpt \verbatim #%chip_type=HuEx-1_0-st-v2 #%chip_type=HuEx-1_0-st-v1 #%chip_type=HuEx-1_0-st-ta1 #%lib_set_name=HuEx-1_0-st #%lib_set_version=r2 #%create_date=Tue Sep 19 15:18:05 PDT 2006 #%guid=0000008635-1158704285-0183259307-0389325148-0127012107 #%pgf_format_version=1.0 #%header0=probeset_id type #%header1= atom_id #%header2= probe_id type gc_count probe_length interrogation_position probe_sequence 2590411 main 1 5402769 pm:st 12 25 13 CGAAGTTGTTCATTTCCCCGAAGAC 2 4684894 pm:st 13 25 13 ATGAGGTCACGACGGTAGGACTAAC 3 3869021 pm:st 11 25 13 AGGAGTACAGGGTAAGATATGGTCT 4 3774604 pm:st 14 25 13 CCCCGAAGACCCTAAGATGAGGTCA ... \endverbatim \section pgfExample2 Example 2 -- Human Genome U133 2.0 Plus PGF File Excerpt Here is a hypothetical example of a PGF file for an expression array with PM/MM pairs. \verbatim #%pgf_format_version=1.0 #%chip_type=HG-U133_Plus_2 #%lib_set_name=HG-U133_Plus_2 #%lib_set_version=1 #%create_date=Tue Mar 29 16:48:05 2005 #%header0=probeset_id type probeset_name #%header1= atom_id #%header2= probe_id type gc_count probe_length interrogation_position probe_sequence exon_position 1354897 204339_s_at 1354898 1221821 pm:target->at 13 25 13 ACAACGACCGTTCCGGAATCGACAT 1703 1222985 mm:target->at 13 25 13 ACAACGACCGTTGCGGAATCGACAT 1703 1354899 788881 pm:target->at 8 25 13 TTACATCATACTTTCTTGTCTCTAG 1355 790045 mm:target->at 8 25 13 TTACATCATACTATCTTGTCTCTAG 1355 1354900 516645 pm:target->at 12 25 13 GAATCTTATCACGAGTTCCACCTCC 1518 517809 mm:target->at 12 25 13 GAATCTTATCACCAGTTCCACCTCC 1518 1354901 736948 pm:target->at 12 25 13 GAGTTCCCTCGATTCCATAAACGAG 1543 \endverbatim \section related Related Pages - TsvFile Design Notes - TSV File Format \section pgf_dev_notes Developer Notes - If you are defining a file format which is based on TsvFile, use this as an example. - PgfFile is not subclassed as we dont want it to inherit methods from TsvFile. This prevents calling a method by accident. - m_tsv is a member so it appears when a PgfFile is made. (ie: minus messy memory management) - We make "tsv" public so if the programmer wants to work with the underlying tsv object may do so. */ #include "file/TsvFile/PgfFile.h" /// This defines an object which has fields and some methods /// to make working with PGF files a snap. Basicly we define /// user friendly method which map to TsvFile mothods. namespace affx { PgfFile::PgfFile() { probeset_id = -1; probeset_type = ""; probeset_name = ""; atom_id = -1; probe_id = -1; probe_type = ""; gc_count = -1; probe_length = -1; interrogation_position = -1; probe_sequence = ""; makePgfBindings(); } /// @brief Destroy a PgfFile object PgfFile::~PgfFile() { } /// @brief Make the normal bindings to the standard PGF slots void PgfFile::makePgfBindings() { // start afresh m_tsv.unbindAll(); //#%header0= m_tsv.bind(0, "probeset_id", &probeset_id, TSV_BIND_REQUIRED); m_tsv.bind(0, "type", &probeset_type); m_tsv.bind(0, "probeset_name", &probeset_name); //#%header1=" m_tsv.bind(1, "atom_id", &atom_id, TSV_BIND_REQUIRED); //#%header2= m_tsv.bind(2, "probe_id", &probe_id, TSV_BIND_REQUIRED); m_tsv.bind(2, "type", &probe_type); m_tsv.bind(2, "gc_count", &gc_count); m_tsv.bind(2, "probe_length", &probe_length); m_tsv.bind(2, "interrogation_position", &interrogation_position); m_tsv.bind(2, "probe_sequence", &probe_sequence); // this seems to be unused... // m_tsv.bind(2,"exon_position",&exon_position); } /// @brief Open a PGF file /// @param filename the pathname of the file /// @return tsv_return_t int PgfFile::open(const std::string& filename) { std::string val; int rc = m_tsv.open(filename); if (rc == TSV_OK) { int rv = m_tsv.getHeader("pgf_format_version", val); Err::check(rv == TSV_OK, "No pgf_format_version header in pgf file"); Err::check(val.compare("1.0") == 0, "Unable to handle pgf file format"); } return rc; } /// @brief start the write of a PGF file. /// @param filename /// @return tsv_return_t int PgfFile::write(const std::string& filename) { return m_tsv.writeTsv(filename); } /// @brief Close a PGF file /// @return tsv_return_t int PgfFile::close() { return m_tsv.close(); } /// @brief Rewind back to the start of the PGF data /// @return tsv_return_t int PgfFile::rewind() { return m_tsv.rewind(); } /// @brief Skip to the next probeset /// @return TSV_LEVEL_LAST when out of probesets int PgfFile::next_probeset() { return m_tsv.nextLevel(0); } /// @brief Skip to the next atom, but not beyond /// @return TSV_LEVEL_LAST when out of atoms int PgfFile::next_atom() { return m_tsv.nextLevel(1); } /// @brief Skip to the next probe, but not beyond /// @return TSV_LEVEL_LAST when out of probes int PgfFile::next_probe() { return m_tsv.nextLevel(2); } /// @brief Define a tsv file as a PGF file. /// @param tsv pointer to tsv to define /// @return tsv_return_t int PgfFile::defineFilePgf(TsvFile* tsv) { Err::check(tsv != NULL, "affx::PgfFile - no tsv file provided to defineFilePgf"); // note embedded "\t"s and "\n"s. return tsv->defineFile("probeset_id\ttype\tprobeset_name\n" "atom_id\n" "probe_id\ttype\tgc_count\tprobe_length\t" // line break for length. "interrogation_position\tprobe_sequence\texon_position\n"); } /// @brief Define this file as a pgf file /// @return tsv_return_t int PgfFile::defineFilePgf() { return defineFilePgf(&m_tsv); } /// @brief Get a ptr for SWIG /// @return a pointer to the internal TSV file object affx::TsvFile* PgfFile::tsv_ptr() { return &m_tsv; } }; affxparser/src/fusion/file/TsvFile/PgfFile.h0000644000175200017520000000717314516003651022020 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * \file PgfFile.h * \brief Headers for the PgfFile class. * Read \link file-format-pgf the PgfFile format \endlink for an overview. * Read \link file-format-tsv the TsvFile docs \endlink more about TsvFile. */ #ifndef _PGF_FILE_H #define _PGF_FILE_H #include "file/TsvFile/TsvFile.h" // #include "util/Err.h" // namespace affx { class PgfFile; }; /// This defines an object which has fields and some methods /// to make working with PGF files a snap. Basicly we define /// user friendly method which map to TsvFile mothods. class affx::PgfFile { public: #ifndef SWIG /// If you want to work with the underling tsv: "pgf.m_tsv.method()" affx::TsvFile m_tsv; #endif // There isnt a "m_" prefix on these, as I want to seperate them out // from the "real" member variables. This will make it // easy for people to write "pgf.atom_id". // (No annoying prefix.) //#%header0= int probeset_id; ///< pgf std::string probeset_type; ///< pgf std::string probeset_name; ///< pgf //#%header1= int atom_id; ///< pgf //#%header2= int probe_id; ///< pgf std::string probe_type; ///< pgf int gc_count; ///< pgf int probe_length; ///< pgf int interrogation_position; ///< pgf std::string probe_sequence; ///< pgf int exon_position; ///< pgf /// @brief Create the PgfFile object PgfFile(); /// @brief Destroy a PgfFile object ~PgfFile(); /// @brief Make the normal bindings to the standard PGF slots void makePgfBindings(); /// @brief Open a PGF file /// @param filename the pathname of the file /// @return tsv_return_t int open(const std::string& filename); /// @brief start the write of a PGF file. /// @param filename /// @return tsv_return_t int write(const std::string& filename); /// @brief Close a PGF file /// @return tsv_return_t int close(); /// @brief Rewind back to the start of the PGF data /// @return tsv_return_t int rewind(); /// @brief Skip to the next probeset /// @return TSV_LEVEL_LAST when out of probesets int next_probeset(); /// @brief Skip to the next atom, but not beyond /// @return TSV_LEVEL_LAST when out of atoms int next_atom(); /// @brief Skip to the next probe, but not beyond /// @return TSV_LEVEL_LAST when out of probes int next_probe(); /// @brief Define a tsv file as a PGF file. /// @param tsv pointer to tsv to define /// @return tsv_return_t int defineFilePgf(TsvFile* tsv); /// @brief Define this file as a pgf file /// @return tsv_return_t int defineFilePgf(); /// @brief Get a ptr for SWIG /// @return a pointer to the internal TSV file object affx::TsvFile* tsv_ptr(); }; #endif // TsvFile affxparser/src/fusion/file/TsvFile/SequentialClfFile.cpp0000644000175200017520000000404514516003651024371 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/TsvFile/SequentialClfFile.h" // #include // /** * Constructor. */ SequentialClfFile::SequentialClfFile(void) : m_Rows(0), m_Cols(0) {} /** * Check whether a clf file is sequential. * * @param clfTsv Clf file tsv object. * @return bool Returns true if the file is sequential, else false. */ bool SequentialClfFile::isSequential(affx::TsvFile& clfTsv) { std::string rowsValue, colsValue, sequentialValue; clfTsv.headersFindNext("rows", rowsValue); clfTsv.headersFindNext("cols", colsValue); clfTsv.headersFindNext("sequential", sequentialValue); // Require rows, cols, and a sequential value of 1. if (rowsValue.empty() || colsValue.empty() || sequentialValue != "1") return false; // Save rows, cols. m_Rows = Convert::toInt(rowsValue); m_Cols = Convert::toInt(colsValue); return true; } /** * Calculate the coordinates of a probe, given its id. * * @param probeId Probe id. * @param x Calculated x value. * @param y Calculated y value. */ void SequentialClfFile::getProbeCoordinates(const int probeId, int& x, int& y) { // Probe id is one based, x and y are zero based. const int id = probeId - 1; x = id % m_Rows; assert(m_Cols > 0); y = id / m_Cols; } affxparser/src/fusion/file/TsvFile/SequentialClfFile.h0000644000175200017520000000353714516003651024043 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file SequentialClfFile.h * @brief Class for coordinate calculation from a sequential clf file. */ #ifndef SEQUENTIAL_CLF_FILE_H #define SEQUENTIAL_CLF_FILE_H #include "file/TsvFile/TsvFile.h" // #include "util/Convert.h" // #include // /** * Class for calculating probe coordinates using a sequential * clf file. */ class SequentialClfFile { public: /** * Constructor. */ SequentialClfFile (void); /** * Check whether a clf file is sequential. * * @param clfTsv Clf file tsv object. * @return bool Returns true if the file is sequential, else false. */ bool isSequential (affx::TsvFile& clfTsv); /** * Calculate the coordinates of a probe, given its id. * * @param probeId Probe id. * @param x Calculated x value. * @param y Calculated y value. */ void getProbeCoordinates (const int probeId, int& x, int& y); private: /// Number of rows. unsigned int m_Rows; /// Number of columns. unsigned int m_Cols; }; #endif /* SEQUENTIAL_CLF_FILE_H */ affxparser/src/fusion/file/TsvFile/SnpTable.cpp0000644000175200017520000000752414516003651022547 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file SnpTable.cpp * @author Chuck Sugnet * @date Thu Feb 9 16:29:28 2006 * * @brief Big table of genotype data. Rows are snp probesets, columns are * experiments. Entries are -1 = NN, 0 = AA, 1 = AB, 2 = BB */ #include "file/TsvFile/SnpTable.h" using namespace affx; /** * Read in genotypes from a file that looks like * probeset_id [tab] chip1 [tab] chip2 [tab]... chipN * SNP_A-1889420 [tab] -1 [tab] 0 [tab] 2 [tab] ... 1 * * Entries are -1 = NN, 0 = AA, 1 = AB, 2 = BB * @param fileName - name of file to open. */ void SnpTable::open(const std::string& fileName) { affx::TsvFile tsv; std::string snpName; std::vector gTypes; tsv.open(fileName); int colCount = tsv.getColumnCount(0); tsv.bind(0, "probeset_id", &snpName); gTypes.resize(colCount - 1); /* Bind up the variables. */ for (int i = 1; i < colCount; i++) { std::string s; tsv.cidx2cname(0, i, s); m_ColNames.push_back(s); tsv.bind(0, i, &gTypes[i-1]); m_ColNameMap[s] = i - 1; } int count = 0; while (tsv.nextLevel(0) == TSV_OK) { m_RowNames.push_back(snpName); m_GTypes.push_back(gTypes); m_RowNameMap[snpName] = count++; } tsv.close(); } /** @brief Return the number of rows in the file * @return rows */ int SnpTable::getNumRows() { return (int)m_GTypes.size(); } /** @brief Return the number of columns in the file. * @return columns */ int SnpTable::getNumCols() { return m_GTypes.empty() ? 0 : (int)m_GTypes[0].size(); } /** @brief Get the name of row rowIx * @param rowIx * @return the name */ const std::string &SnpTable::getRowName(int rowIx) { assert(rowIx < (int)m_RowNames.size()); return m_RowNames[rowIx]; } /** @brief Get the name of column colIx * @param colIx * @return the name */ const std::string &SnpTable::getColName(int colIx) { assert(colIx < (int)m_ColNames.size()); return m_ColNames[colIx]; } /** * Find the row index associated with a particular identified, -1 if * not found. * * @param s - row identifier. * @return - index if found, -1 otherwise. */ int SnpTable::getRowIndex(const std::string &s) { std::map::iterator iter = m_RowNameMap.find(s); if (m_RowNameMap.end() == iter) return -1; else return iter->second; } /** * Find the col index associated with a particular identified, -1 if * not found. * * @param s - col identifier. * @return - index if found, -1 otherwise. */ int SnpTable::getColIndex(const std::string &s) { std::map::iterator iter = m_ColNameMap.find(s); if (m_ColNameMap.end() == iter) return -1; else return iter->second; } /** * Get the genotype for the particular row and index of the genotype matrix. * * @param rowIndex - Row of interest. * @param colIndex - Column of interest. * @return the genotype requested. */ int SnpTable::getGenotypeForSnp(int rowIndex, int colIndex) { assert(rowIndex < (int)m_GTypes.size()); assert(colIndex < (int)m_GTypes[0].size()); return m_GTypes[rowIndex][colIndex]; } affxparser/src/fusion/file/TsvFile/SnpTable.h0000644000175200017520000000672414516003651022215 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file SnpTable.h * @author Chuck Sugnet * @date Thu Feb 9 16:29:28 2006 * * @brief Big table of genotype data. Rows are snp probesets, columns are * experiments. Entries are -1 = NN, 0 = AA, 1 = AB, 2 = BB */ #ifndef SNPTABLE_H #define SNPTABLE_H #include "file/TsvFile/TsvFile.h" // #include // using namespace affx; class SnpTable { public: const static int NN = -1; const static int AA = 0; const static int AB = 1; const static int BB = 2; /** * Read in genotypes from a file that looks like * probeset_id [tab] chip1 [tab] chip2 [tab]... chipN * SNP_A-1889420 [tab] -1 [tab] 0 [tab] 2 [tab] ... 1 * * Entries are -1 = NN, 0 = AA, 1 = AB, 2 = BB * @param fileName - name of file to open. */ void open(const std::string& fileName); /** @brief Return the number of rows in the file * @return rows */ int getNumRows(); /** @brief Return the number of columns in the file. * @return columns */ int getNumCols(); /** @brief Get the name of row rowIx * @param rowIx * @return the name */ const std::string &getRowName(int rowIx); /** @brief Get the name of column colIx * @param colIx * @return the name */ const std::string &getColName(int colIx); /** * Find the row index associated with a particular identified, -1 if * not found. * * @param s - row identifier. * @return - index if found, -1 otherwise. */ int getRowIndex(const std::string &s); /** * Find the col index associated with a particular identified, -1 if * not found. * * @param s - col identifier. * @return - index if found, -1 otherwise. */ int getColIndex(const std::string &s); /** * Get the genotype for the particular row and index of the genotype matrix. * * @param rowIndex - Row of interest. * @param colIndex - Column of interest. * @return the genotype requested. */ int getGenotypeForSnp(int rowIndex, int colIndex); private: /// Unique identifiers (usually probeset ids) associated with each row. std::vector m_RowNames; /// Unique identifiers (usually chip names) associated with each column. std::vector m_ColNames; /// Map of probeset ids to the index of the row that contains the /// data for that snp std::map m_RowNameMap; /// Map of probeset ids to the index of the column that contains the /// data for that chip std::map m_ColNameMap; /// Matrix of genotypes rows are snps, columns are the chips. std::vector > m_GTypes; }; #endif /* SNPTABLE_H */ affxparser/src/fusion/file/TsvFile/SpfFile.cpp0000644000175200017520000002775214516003651022374 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "file/TsvFile/SpfFile.h" affx::SpfFile::SpfFile() { clearOptions(); clearHeaderInfo(); clearCidx(); } void affx::SpfFile::clearOptions() { m_spf_format=affx::SpfFile::FMT_UNSET; // m_has_allele_info=0; m_has_context_info=0; m_has_channel_info=0; m_has_rep_type_info=0; } void affx::SpfFile::clearHeaderInfo() { m_header_format=-1; m_header_chipTypes.resize(0); m_header_numCols=-1; m_header_numRows=-1; m_header_numProbesets=-1; m_header_numChannels=-1; } void affx::SpfFile::clearCidx() { // all formats v1, v2, v3, v4 m_name_cidx=-1; m_type_cidx=-1; m_num_match_cidx=-1; // format v1 and v2 // all level 0 m_v2_num_blocks_cidx=-1; m_v2_block_sizes_cidx=-1; m_v2_block_annotations_cidx=-1; m_v2_num_probes_cidx=-1; m_v2_probes_cidx=-1; m_v2_block_alleles_cidx=-1; m_v2_block_contexts_cidx=-1; m_v2_block_channels_cidx=-1; m_v2_block_rep_types_cidx=-1; // format v3 // level 1 m_v3_allele_lbl_cidx=-1; m_v3_allele_code_cidx=-1; // level 2 m_v3_context_lbl_cidx=-1; m_v3_context_code_cidx=-1; m_v3_channel_code_cidx=-1; m_v3_rep_type_cidx=-1; m_v3_annotation_code_cidx=-1; // level 3 m_v3_probe_id_cidx=-1; // format v4 // level 1 m_v4_block_lbl_cidx=-1; m_v4_annotation_code_cidx=-1; m_v4_allele_code_cidx=-1; m_v4_context_code_cidx=-1; m_v4_channel_code_cidx=-1; m_v4_rep_type_cidx=-1; // level 2 m_v4_probe_id_cidx=-1; } #define DUMP_CIDX_HEAD(_head) { printf("== %-30s\n",_head); } #define DUMP_CIDX(_cidx) { printf(" %-30s = %3d\n",#_cidx,_cidx); } void affx::SpfFile::dumpCidx() { DUMP_CIDX_HEAD("features"); DUMP_CIDX(m_spf_format); DUMP_CIDX(m_has_allele_info); DUMP_CIDX(m_has_context_info); DUMP_CIDX(m_has_channel_info); DUMP_CIDX(m_has_rep_type_info); DUMP_CIDX_HEAD("required"); DUMP_CIDX(m_name_cidx); DUMP_CIDX(m_type_cidx); DUMP_CIDX(m_num_match_cidx); DUMP_CIDX_HEAD("v2"); DUMP_CIDX(m_v2_num_blocks_cidx); DUMP_CIDX(m_v2_block_sizes_cidx); DUMP_CIDX(m_v2_block_annotations_cidx); DUMP_CIDX(m_v2_num_probes_cidx); DUMP_CIDX(m_v2_probes_cidx); DUMP_CIDX(m_v2_block_alleles_cidx); DUMP_CIDX(m_v2_block_contexts_cidx); DUMP_CIDX(m_v2_block_channels_cidx); DUMP_CIDX(m_v2_block_rep_types_cidx); DUMP_CIDX_HEAD("v3"); DUMP_CIDX(m_v3_allele_lbl_cidx); DUMP_CIDX(m_v3_allele_code_cidx); DUMP_CIDX(m_v3_context_lbl_cidx); DUMP_CIDX(m_v3_context_code_cidx); DUMP_CIDX(m_v3_channel_code_cidx); DUMP_CIDX(m_v3_annotation_code_cidx); DUMP_CIDX(m_v3_probe_id_cidx); DUMP_CIDX_HEAD("v4"); DUMP_CIDX(m_v4_block_lbl_cidx); DUMP_CIDX(m_v4_annotation_code_cidx); DUMP_CIDX(m_v4_allele_code_cidx); DUMP_CIDX(m_v4_context_code_cidx); DUMP_CIDX(m_v4_channel_code_cidx); DUMP_CIDX(m_v4_rep_type_cidx); DUMP_CIDX(m_v4_probe_id_cidx); } #undef DUMP_CIDX_HEAD #undef DUMP_CIDX void affx::SpfFile::setFormat(int fmt) { m_spf_format=fmt; } ////////// void affx::SpfFile::define_v2() { clearCidx(); setFormat(2); // int cidx=0; m_name_cidx=cidx++; defineColumn(0,m_name_cidx,"name"); m_type_cidx=cidx++; defineColumn(0,m_type_cidx,"type"); // m_v2_num_blocks_cidx=cidx++; defineColumn(0,m_v2_num_blocks_cidx,"num_blocks"); m_v2_block_sizes_cidx=cidx++; defineColumn(0,m_v2_block_sizes_cidx,"block_sizes"); m_v2_block_annotations_cidx=cidx++; defineColumn(0,m_v2_block_annotations_cidx,"block_annotations"); m_num_match_cidx=cidx++; defineColumn(0,m_num_match_cidx,"num_match"); m_v2_num_probes_cidx=cidx++; defineColumn(0,m_v2_num_probes_cidx,"num_probes"); m_v2_probes_cidx=cidx++; defineColumn(0,m_v2_probes_cidx,"probes"); // optional columns if (m_has_allele_info) { m_v2_block_alleles_cidx=cidx++; defineColumn(0,m_v2_block_alleles_cidx,"block_alleles"); } if (m_has_context_info) { m_v2_block_contexts_cidx=cidx++; defineColumn(0,m_v2_block_contexts_cidx,"block_contexts"); } if (m_has_channel_info) { m_v2_block_channels_cidx=cidx++; defineColumn(0,m_v2_block_channels_cidx,"block_channels"); } if (m_has_rep_type_info) { m_v2_block_rep_types_cidx=cidx++; defineColumn(0,m_v2_block_rep_types_cidx,"block_rep_types"); } } void affx::SpfFile::define_v3() { clearCidx(); setFormat(3); // level0 m_name_cidx=0; defineColumn(0,m_name_cidx,"name"); m_type_cidx=1; defineColumn(0,m_type_cidx,"type"); m_num_match_cidx=2; defineColumn(0,m_num_match_cidx,"num_match"); // level1 int cidx1=0; m_v3_allele_lbl_cidx=cidx1++; defineColumn(1,m_v3_allele_lbl_cidx,"allele"); if (m_has_allele_info) { m_v3_allele_code_cidx=cidx1++; defineColumn(1,m_v3_allele_code_cidx,"allele_code"); } // int cidx2=0; m_v3_context_lbl_cidx=cidx2++; defineColumn(2,m_v3_context_lbl_cidx,"context"); // m_v3_annotation_code_cidx=cidx2++; defineColumn(2,m_v3_annotation_code_cidx,"annotation"); // if (m_has_context_info) { m_v3_context_code_cidx=cidx2++; defineColumn(2,m_v3_context_code_cidx,"context_code"); } // if (m_has_channel_info) { m_v3_channel_code_cidx=cidx2++; defineColumn(2,m_v3_channel_code_cidx,"channel_code"); } // level 3 m_v3_probe_id_cidx=0; defineColumn(3,m_v3_probe_id_cidx,"probe_id"); } void affx::SpfFile::define_v4() { clearCidx(); setFormat(4); // level0 m_name_cidx=0; defineColumn(0,m_name_cidx,"name"); m_type_cidx=1; defineColumn(0,m_type_cidx,"type"); m_num_match_cidx=2; defineColumn(0,m_num_match_cidx,"num_match"); // level1 int cidx1=0; m_v4_block_lbl_cidx=cidx1++; defineColumn(1,m_v4_block_lbl_cidx,"block"); // m_v4_annotation_code_cidx=cidx1++; defineColumn(1,m_v4_annotation_code_cidx,"annotation"); if (m_has_allele_info==1) { m_v4_allele_code_cidx=cidx1++; defineColumn(1,m_v4_allele_code_cidx,"allele_code"); } if (m_has_context_info==1) { m_v4_context_code_cidx=cidx1++; defineColumn(1,m_v4_context_code_cidx,"context_code"); } if (m_has_channel_info==1) { m_v4_channel_code_cidx=cidx1++; defineColumn(1,m_v4_channel_code_cidx,"channel_code"); } if (m_has_rep_type_info==1) { m_v4_rep_type_cidx=cidx1++; defineColumn(1,m_v4_rep_type_cidx,"rep_type"); } // level 2 m_v4_probe_id_cidx=0; defineColumn(2,m_v4_probe_id_cidx,"probe_id"); } void affx::SpfFile::define_file(int version) { if ((version==1)||(version==2)) { define_v2(); } else if (version==3) { define_v3(); } else if (version==4) { define_v4(); } else { APT_ERR_ABORT("bad spf version"); } } int affx::SpfFile::writeSpf(const std::string& fileName) { // define_file(m_spf_format); addStandardHeaders(); // if ((m_spf_format==affx::SpfFile::FMT_1)||(m_spf_format==affx::SpfFile::FMT_2)) { return writeTsv_v1(fileName); } else if ((m_spf_format==affx::SpfFile::FMT_3)||(m_spf_format==affx::SpfFile::FMT_4)) { return writeTsv_v2(fileName); } else { APT_ERR_ABORT("affx::SpfFile::writeSpf(): unknown format."); return -1; } } int affx::SpfFile::writeSpf(const std::string& fileName,int format) { m_spf_format=format; return writeSpf(fileName); } ////////// int affx::SpfFile::determineFormat() { // The "_lbl_cidx" columns are just there for the humans. // we done really care if they arent there. if ((m_name_cidx<0)||(m_type_cidx<0)) { m_spf_format=0; return m_spf_format; } if ((m_v2_num_blocks_cidx>=0) && (m_v2_block_sizes_cidx>=0) && (m_v2_block_annotations_cidx>=0) && (m_num_match_cidx >=0) && (m_v2_num_probes_cidx >=0) && (m_v2_probes_cidx >=0)) { m_spf_format=2; return m_spf_format; } // allele_code and context are required for v3 if ((m_v3_annotation_code_cidx>=0) && (m_v3_allele_code_cidx>=0) && (m_v3_context_code_cidx>=0) && (m_v3_probe_id_cidx>=0)) { m_spf_format=3; return m_spf_format; } // allele_code, context_code and channel_code are optional for v4 if ((m_v4_annotation_code_cidx>=0) && (m_v4_probe_id_cidx>=0)) { m_spf_format=4; return m_spf_format; } // punt! m_spf_format=0; return m_spf_format; } // on second thought, the headers arent really required. // but the return value should still be checked. // #define getRequiredHeader(_key,_val) { if (getHeader(_key,_val)!=TSV_OK) { APT_ERR_ABORT("SpfFile::readSpf(): missing header: '" _key "'") } } int affx::SpfFile::openSpf(const std::string& fileName) { if (open(fileName)!=affx::TSV_OK) { APT_ERR_ABORT("SpfFile::readSpf: Couldn't open '" + ToStr(fileName) + "' to read."); } // clearHeaderInfo(); // getHeaderAppend("chip-type",m_header_chipTypes); getHeaderAppend("chip_type",m_header_chipTypes); // getHeader("spf-format",m_header_format); getHeader("chip_type",m_header_chipTypes); getHeader("num-cols",m_header_numCols); getHeader("num-rows",m_header_numRows); getHeader("num-probesets",m_header_numProbesets); getHeader("num-channels",m_header_numChannels); // zero out the cidx info first. clearCidx(); // common to v1,v2,v3 m_name_cidx = cname2cidx(0,"name"); m_type_cidx = cname2cidx(0,"type"); m_num_match_cidx = cname2cidx(0,"num_match"); // v1,v2 m_v2_num_blocks_cidx = cname2cidx(0,"num_blocks"); m_v2_block_sizes_cidx = cname2cidx(0,"block_sizes"); m_v2_block_annotations_cidx = cname2cidx(0,"block_annotations"); m_v2_num_probes_cidx = cname2cidx(0,"num_probes"); m_v2_probes_cidx = cname2cidx(0,"probes"); m_v2_block_alleles_cidx = cname2cidx(0,"block_alleles"); m_v2_block_contexts_cidx = cname2cidx(0,"block_contexts"); m_v2_block_channels_cidx = cname2cidx(0,"block_channels"); m_v2_block_rep_types_cidx = cname2cidx(0,"block_rep_types"); // v3 // level 1 m_v3_allele_lbl_cidx=cname2cidx(1,"allele"); // always the string "allele" for readablity m_v3_allele_code_cidx=cname2cidx(1,"allele_code"); // level 2 m_v3_context_lbl_cidx=cname2cidx(2,"context"); // always the string "context" for readablity m_v3_context_code_cidx=cname2cidx(2,"context_code"); m_v3_annotation_code_cidx=cname2cidx(2,"annotation"); m_v3_channel_code_cidx=cname2cidx(2,"channel_code"); // level 3 m_v3_probe_id_cidx=cname2cidx(3,"probe_id"); // v4 // level 1 m_v4_block_lbl_cidx=cname2cidx(1,"block"); // always the string "block" for readablity m_v4_annotation_code_cidx=cname2cidx(1,"annotation"); m_v4_allele_code_cidx=cname2cidx(1,"allele_code"); m_v4_context_code_cidx=cname2cidx(1,"context_code"); m_v4_channel_code_cidx=cname2cidx(1,"channel_code"); m_v4_rep_type_cidx=cname2cidx(1,"rep_type"); // level 2 m_v4_probe_id_cidx=cname2cidx(2,"probe_id"); // what does this file look like? determineFormat(); // return affx::TSV_OK; } const char* spf_standard_headers[]={ "spf-format", "chip_type", // @todo: we should only have "chip-type" "chip-type", "num-cols", "num-rows", "num-channels", NULL }; void affx::SpfFile::addStandardHeaders() { deleteHeaders(spf_standard_headers); // addHeader("spf-format",m_spf_format); addHeader("chip_type",m_header_chipTypes); addHeader("num-cols",m_header_numCols); addHeader("num-rows",m_header_numRows); addHeader("num-probesets",m_header_numProbesets); addHeader("num-channels",m_header_numChannels); } affxparser/src/fusion/file/TsvFile/SpfFile.h0000644000175200017520000001567414516003651022041 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /// @file SpfFile.h /// @brief See the discussion in chipstream/ChipLayout.cpp for the format of the Spf file. // It should be worth noting that the difference // between v1 and v2 is the number of columns. // To get a v1 file, turn off the columns you dont want. // The difference between v2 and v3 is the v3 // format has probes on seperate lines to make them // easy to comment out. // See apt-convert-spf for examples. //// Required v2 column list: // name type num_blocks block_sizes block_annotations num_match num_probes probes //// Required v3 column list: // // #%header0=name type num_match // #%header1= allele_code // #%header2= annotation // #%header3= probe_id // Optional columns: // // Feature Enables column // ------------------- -------------------------- // m_has_allele_info block_alleles (v2) // allele_code (v3) // m_has_context_info block_contexts (v2) // context_code (v3) // m_has_channel_info block_channels (v2) // channel_code (v3) // m_has_rep_type_info block_rep_types (v2) // rep_type (v3) //// Version 2 Example // // #%chip_type=GenomeWideSNP_6 // #%num-probesets=1856069 // #%num-cols=2680 // #%num-rows=2572 // #%spf-format=2 // name type num_blocks block_sizes block_annotations num_match num_probes probes block_alleles block_contexts block_channels block_rep_types // AFR_010_SB 1 1 15 0 1 15 3662237,3595293,3670285,3614059,3597992,3568454,3678322,3611342,3643542,3675711,3547055,3565800,3549758,3608717,3622070 -1 -1 0 0 // AFR_012_SB 1 1 15 0 1 15 3605997,3568443,3640857,3678315,3600655,3656947,3654195,3667602,3584537,3622059,3659564,3672985,3565768,3573837,3552392 -1 -1 0 0 // AFR_035_SB 1 1 15 0 1 15 3571187,3592558,3584513,3662257,3622042,3632783,3646158,3654250,3595269,3651528,3565776,3659608,3640877,3648853,3579227 -1 -1 0 0 // AFR_054_SB 1 1 15 0 1 15 3549750,3624759,3584540,3573857,3648916,3635446,3614066,3648833,3587229,3646218,3638193,3587217,3646187,3573792,3651512 -1 -1 0 0 // AFR_057_SB 1 1 15 0 1 15 3544345,3664998,3624714,3579207,3675718,3606016,3541672,3670357,3619380,3541704,3654247,3600619,3584588,3547015,3611330 -1 -1 0 0 //// Examples from the meeting of 'Fri Apr 17 14:50:41 PDT 2009' // an example falcon genotype probe. // three probes, PM only, no wobble, two color, allele-nonspecific // three probes on the physical array // falconprobe1 2 2 3 0 1 6 0,1,2,0,1,2 0,1 0,0 0,1 // allele-specific G=> channel 1 // six probes on the physical array // falconprobe2 2 2 3 0 1 6 4,5,6,7,8,9 0,1 0,0 1,1 // allele-nonspecific, with context 2 alleles-channels x 2 contexts x 3 probes => 2x2x3 => 12 // six probes on the physical array // falconprobe3 2 4 3 0 1 12 11,12,13,11,12,13,0,1,2,0,1,2 0,1,0,1 0,0,1,1 0,1,0,1 //// Version 3 Example // // #%spf-format=3 // #%chip_type=GenomeWideSNP_6 // #%num-probesets=1856069 // #%num-cols=2680 // #%num-rows=2572 // #%header0=name type num_match // #%header1= allele allele_code // #%header2= context context_code channel_code annotation // #%header3= probe_id // AFFX-5Q-123 1 // allele -1 // context -1 0 0 // 3466655 // 3463975 // 3461295 // 3458615 // 3453255 // AFFX-5Q-456 1 // allele -1 // context -1 0 0 // 3466660 // 3463980 // 3461300 // 3458620 // 3455940 // AFFX-5Q-789 1 // allele -1 // context -1 0 0 // 3466665 // 3463985 // 3461305 // 3458625 //// Version 4 Example // // #%spf-format=4 // #%chip_type=GenomeWideSNP_6 // #%num-probesets=1856069 // #%num-cols=2680 // #%num-rows=2572 // #%num-FOO=? // #%num-channels=? // #%header0=name type num_match // #%header1= block annotation allele_code context_code channel_code rep_type // #%header2= probe_id // AFFX-5Q-123 1 // block -1 -1 0 0 3 // 3466655 // 3463975 // 3461295 #ifndef _SPFFILE_H_ #define _SPFFILE_H_ // #include "file/TsvFile/TsvFile.h" namespace affx { class SpfFile; } class affx::SpfFile : public affx::TsvFile { public: /// enum spf_format_t { FMT_UNSET=0, FMT_1=1, FMT_2=2, FMT_3=3, FMT_4=4, }; /// 1,2 or 3. int m_spf_format; // in the order they should appear std::string m_header_format; std::vector m_header_chipTypes; int m_header_numCols; int m_header_numRows; int m_header_numProbesets; int m_header_numChannels; /// enable/disable these on before calling "define_v{2,3}" to get the extra columns. int m_has_allele_info; int m_has_context_info; int m_has_channel_info; int m_has_rep_type_info; /// When openSpf is called all the "m_XXX_cidx" members are set. /// all formats v1,v2,v3,v4 // level 0 int m_name_cidx; int m_type_cidx; int m_num_match_cidx; // format v1,v2 // all level 0 int m_v2_num_blocks_cidx; int m_v2_block_sizes_cidx; int m_v2_block_annotations_cidx; int m_v2_num_probes_cidx; int m_v2_probes_cidx; int m_v2_block_alleles_cidx; int m_v2_block_contexts_cidx; int m_v2_block_channels_cidx; int m_v2_block_rep_types_cidx; // format v3 // level 1 int m_v3_allele_lbl_cidx; int m_v3_allele_code_cidx; // level 2 int m_v3_context_lbl_cidx; int m_v3_context_code_cidx; int m_v3_channel_code_cidx; int m_v3_rep_type_cidx; int m_v3_annotation_code_cidx; // level 3 int m_v3_probe_id_cidx; // format v4 // level 1 int m_v4_block_lbl_cidx; int m_v4_annotation_code_cidx; int m_v4_allele_code_cidx; int m_v4_context_code_cidx; int m_v4_channel_code_cidx; int m_v4_rep_type_cidx; // level 2 int m_v4_probe_id_cidx; // SpfFile(); // void clearOptions(); void clearHeaderInfo(); void clearCidx(); void dumpCidx(); // void setFormat(int fmt); // int determineFormat(); // int openSpf(const std::string& fileName); // selects one of the below. void define_file(int version); // v1 is v2 with no "has_XXX_info"s set. void define_v2(); void define_v3(); void define_v4(); // int writeSpf(const std::string& fileName); int writeSpf(const std::string& fileName,int format); // void addStandardHeaders(); private: void define_v2_cols(); }; #endif affxparser/src/fusion/file/TsvFile/TsvFile.cpp0000644000175200017520000036552714516003651022425 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /// @file TsvFile.cpp /// @brief The implementation of TsvFile // #include "file/TsvFile/TsvFile.h" // #include "util/Convert.h" #include "util/Err.h" #include "util/Fs.h" #include "util/Util.h" #include "util/Verbose.h" // #include #include #include #include #include #include #include #include #include // using namespace std; using namespace affx; // macros for TSV_ERR_AT stringification. #define TSV_STRINGIFY(_str) #_str #define TSV_TOSTR(_sym) TSV_STRINGIFY(_sym) // used to report where the error occured. // @todo should set the error code too. #define TSV_ERR_ABORT(_msg) APT_ERR_ABORT(_msg); #ifdef TSV_USE_RDBUF #define M_GETC() (m_rdbuf->sbumpc()) #define M_PEEK() (m_rdbuf->sgetc()) #define M_UNGETC(c) (m_rdbuf->sputbackc(c)) #else #define M_GETC() (m_fileStream.get()) #define M_PEEK() (m_fileStream.peek()) #define M_UNGETC(c) (m_fileStream.putback(c)) #endif // #define TSV_SET_ERR(err) (setError(err))) #define TSV_SET_ERR(err) (m_errno=err) // Chars literials in numeric form (so they arent converted // by the compiler) should be an enum? /// Hex code for TAB char #define TSV_CHAR_TAB (0x09) /// Hex code for LF char #define TSV_CHAR_LF (0x0a) /// Hex code for a COMMA char #define TSV_CHAR_COMMA (0x2c) /// Hex code for CR char #define TSV_CHAR_CR (0x0d) /// Hex code for comment char #define TSV_CHAR_COMMENT (0x23) /// Hex code for SPACE char #define TSV_CHAR_SPACE (0x20) /// code for end-of-file signal #define TSV_CHAR_EOF (-1) /// Comment string #define TSV_STRING_COMMENT "#" /// Meta string #define TSV_STRING_META "#%" /// Unix End-of-line #define TSV_EOL_UNIX "\n" /// Windows End-of-line #define TSV_EOL_WIN "\r\n" // be nice to less fortunate users... // use the native line endings #ifdef WIN32 #define TSV_EOL TSV_EOL_WIN #else #define TSV_EOL TSV_EOL_UNIX #endif //////////////////// /// @brief Trim whitespace from the left of a string /// @param str the string to trim void affx::ltrim(std::string& str) { string::size_type pos=str.find_first_not_of(" \t"); if (pos==std::string::npos) { str.erase(); } else { str.erase(0,pos); } } /// @brief Trim whitespace from the right of a string /// @param str the string to trim void affx::rtrim(std::string& str) { string::size_type pos=str.find_last_not_of(" \t"); if (pos==std::string::npos) { str.erase(); } else { str.erase(pos+1); } } /// @brief Trim whitespace from both sides of a string /// @param str the string to trim void affx::trim(std::string& str) { ltrim(str); rtrim(str); } // Turn a string into a lowercase string. std::string affx::tolower(const string& str) { std::string str_lc=str; for (size_t i=0;i& vec) { string::size_type pos1=0; string::size_type pos2=0; int num=0; vec.clear(); while (1) { pos2=str.find(c,pos1); if (pos2==string::npos) { vec.resize(num+1); vec[num].assign(str,pos1,str.size()-pos1); break; } else { vec.resize(num+1); vec[num].assign(str,pos1,pos2-pos1); pos1=pos2+1; } ++num; } return num; } /// @brief Convert the escaped char to the char which it represents /// @param c the escaped char /// @return the converted char int affx::unescapeChar(int c) { switch (c) { case 'b': return 8 ; // backspace case 'e': return 27 ; // escape case 'f': return 12 ; // form feed case 'n': return TSV_CHAR_LF ; case 'r': return TSV_CHAR_CR ; case 't': return TSV_CHAR_TAB; case 'v': return 11; // vertical tab // return the character unchanged. default: return c; } } int affx::escapeChar(int c) { switch (c) { case 8: return 'b' ; // backspace case 27: return 'e' ; // escape case 12: return 'f' ; // form feed case TSV_CHAR_LF: return 'n' ; case TSV_CHAR_CR: return 'r' ; case TSV_CHAR_TAB: return 't' ; case 11: return 'v' ; // vertical tab case '#': return '#' ; // comment char // no escape needed. default: return 0; } } std::string affx::escapeString(const std::string& str,const char eChar) { int i_max=str.size(); std::string estring; estring.reserve(i_max); for (int i=0;ierase(); val->append(m_buffer.begin(),m_buffer.end()); return TSV_OK; } /// @brief Get the value as an int /// @param val val to set /// @return tsv_error_t int affx::TsvFileField::get(short* val) { int tmp_val; int rv; // reset the val. *val=-1; // rv=get(&tmp_val); if (rv!=TSV_OK) { return rv; } if ((tmp_val<-32767)||(tmp_val>32767)) { return TSV_ERR_CONVERSION; } *val=tmp_val; return TSV_OK; } /// @brief Get the value as an int /// @param val val to set /// @return tsv_error_t int affx::TsvFileField::get(int* val) { TSV_NULL_RETURN(0); // done already? if (m_value_int_done) { *val=m_value_int; return m_value_int_rv; } // const char* ptr_start=m_buffer.c_str(); char* ptr_end=NULL; /// @todo: "" should be an error? // do the conversion (Force base 10) m_value_int=strtol(ptr_start,&ptr_end,10); // If m_ptr_end is not NULL then there was an invalid character. // If m_ptr_end == m_ptr_start then it was the null string. ("") if ((*ptr_end!=0)||(ptr_end==ptr_start)) { m_value_int=-1; // fill in with something off the end m_value_int_rv=TSV_ERR_CONVERSION; // signal an error } else { m_value_int_rv=TSV_OK; } m_value_int_done=true; *val=m_value_int; return m_value_int_rv; } /// @brief Get the value as a double /// @param val val to set /// @return tsv_error_t int affx::TsvFileField::get(double* val) { TSV_NULL_RETURN(0.0); // done already? if (m_value_double_done) { *val=m_value_double; return m_value_double_rv; } // const char* ptr_start=m_buffer.c_str(); char* ptr_end=NULL; // do the conversion m_value_double=strtod(ptr_start,&ptr_end); // same checks as strtol if ((*ptr_end!=0)||(ptr_end==ptr_start)) { m_value_double=-1.0; // bad value m_value_double_rv=TSV_ERR_CONVERSION; // signal an error } else { m_value_double_rv=TSV_OK; } m_value_double_done=true; *val=m_value_double; return m_value_double_rv; } /// @brief Get the value as a float /// @param val val to set /// @return tsv_error_t int affx::TsvFileField::get(float* val) { if (m_value_double_done) { *val=(float)m_value_double; return m_value_double_rv; } // Get the double value and cast it. double tmp_double; int rv; rv=get(&tmp_double); *val=(float)tmp_double; return rv; } /// @brief Get the value as an unsigned int /// @param val val to set /// @return tsv_error_t int affx::TsvFileField::get(unsigned int* val) { TSV_NULL_RETURN(0); // done already? if (m_value_uint_done) { *val=m_value_uint; return m_value_uint_rv; } // const char* m_ptr_start=m_buffer.c_str(); char* m_ptr_end=NULL; // do the conversion m_value_uint=strtoul(m_ptr_start,&m_ptr_end,10); if ((*m_ptr_end)!=0) { m_value_uint=0; // fill in zero on error. m_value_uint_rv=TSV_ERR_CONVERSION; // signal it. } else { m_value_uint_rv=TSV_OK; } m_value_uint_done=true; *val=m_value_uint; return m_value_uint_rv; } /// @brief Get the value as an uint64_t /// @param val val to set /// @return tsv_error_t int affx::TsvFileField::get(uint64_t* val) { TSV_NULL_RETURN(0); // done already? if (m_value_ulonglong_done) { *val=m_value_ulonglong; return m_value_ulonglong_rv; } // const char* m_ptr_start=m_buffer.c_str(); char* m_ptr_end=NULL; // do the conversion #ifndef WIN32 m_value_ulonglong=strtoull(m_ptr_start,&m_ptr_end,10); #else m_value_ulonglong=_strtoui64(m_ptr_start,&m_ptr_end,10); #endif if ((*m_ptr_end)!=0) { m_value_ulonglong=0; // fill in zero on error. m_value_ulonglong_rv=TSV_ERR_CONVERSION; // signal it. } else { m_value_ulonglong_rv=TSV_OK; } m_value_ulonglong_done=true; *val=m_value_ulonglong; return m_value_ulonglong_rv; } ///// #define TSV_SPLIT_VEC_BODY(TYPE) \ int rv=affx::TSV_OK; \ string::size_type pos1; \ string::size_type pos2; \ char* pos_out; \ TYPE val; \ \ vec->clear(); \ \ if (m_buffer=="") { \ return rv; \ } \ \ pos1=0; \ while (1) { \ pos2=m_buffer.find(sep,pos1); \ if (pos2==string::npos) { \ val=CONVERT_FUNC(); \ vec->push_back(val); \ break; \ } else { \ val=CONVERT_FUNC(); \ vec->push_back(val); \ pos1=pos2+1; \ } \ } \ return rv; int affx::TsvFileField::get(std::vector* vec,char sep) { #define CONVERT_FUNC() strtol(&m_buffer[pos1],&pos_out,10) TSV_SPLIT_VEC_BODY(int); #undef CONVERT_FUNC } int affx::TsvFileField::get(std::vector* vec,char sep) { #define CONVERT_FUNC() (float)strtod(&m_buffer[pos1],&pos_out) TSV_SPLIT_VEC_BODY(float); #undef CONVERT_FUNC } int affx::TsvFileField::get(std::vector* vec,char sep) { #define CONVERT_FUNC() strtod(&m_buffer[pos1],&pos_out) TSV_SPLIT_VEC_BODY(double); #undef CONVERT_FUNC } int affx::TsvFileField::get(std::vector* vec,char sep) { int rv=affx::TSV_OK; string::size_type pos1; string::size_type pos2; std::string val; vec->clear(); if (m_buffer=="") { return rv; } pos1=0; while (1) { pos2=m_buffer.find(sep,pos1); if (pos2==string::npos) { val.assign(m_buffer,pos1,m_buffer.size()-pos1); vec->push_back(val); break; } else { val.assign(m_buffer,pos1,pos2-pos1); vec->push_back(val); pos1=pos2+1; } } return rv; } ////////// #define TSV_CONCAT_VEC_BODY() { \ if (vec.size()>0) { \ stream << vec[0]; \ for (size_t i=1;i& vec,char sep) { std::ostringstream stream; TSV_CONCAT_VEC_BODY(); } int affx::TsvFileField::set(const std::vector& vec,char sep) { std::ostringstream stream; TSV_CONCAT_VEC_BODY(); } int affx::TsvFileField::set(const std::vector& vec,char sep) { std::ostringstream stream; stream.setf(ios::fixed, ios::floatfield); stream.precision(m_precision); TSV_CONCAT_VEC_BODY(); } int affx::TsvFileField::set(const std::vector& vec,char sep) { std::ostringstream stream; stream.setf(ios::fixed, ios::floatfield); stream.precision(m_precision); TSV_CONCAT_VEC_BODY(); } #undef TSV_CONCAT_VEC_BODY ////////// /// @brief set the value of the buffer /// @param val the new string value /// @return tsv_return_t int affx::TsvFileField::set(const std::string& val) { return setBuffer(val); } /// @brief set the value of the buffer /// @param val the new value /// @return tsv_return_t /// @todo dont use ostringstream, use something faster int affx::TsvFileField::set(int val) { m_value_int=val; m_value_int_done=true; m_value_int_rv=TSV_OK; m_val_state=affx::VALSTATE_INT; return TSV_OK; } #define TSV_CONVERT_FLOAT_BODY() { \ std::ostringstream stream; \ stream.setf(ios::fixed, ios::floatfield); \ stream.precision(m_precision); \ stream << val; \ return setBuffer(stream.str()); \ } /// @brief set the value of the buffer /// @param val the new value /// @return tsv_return_t /// @todo dont use ostringstream, use something faster int affx::TsvFileField::set(float val) { //TSV_CONVERT_FLOAT_BODY(); m_value_double=val; m_value_double_done=true; m_value_double_rv=TSV_OK; m_val_state=affx::VALSTATE_DOUBLE; return affx::TSV_OK; } /// @brief set the value of the buffer /// @param val the new value /// @return tsv_return_t /// @todo dont use ostringstream, use something faster int affx::TsvFileField::set(double val) { m_value_double=val; m_value_double_done=true; m_value_double_rv=affx::TSV_OK; m_val_state=affx::VALSTATE_DOUBLE; return affx::TSV_OK; } #undef TSV_CONVERT_FLOAT_BODY /// @brief set the value of the buffer /// @param val the new value /// @return tsv_return_t /// @todo dont use ostringstream, use something faster int affx::TsvFileField::set(unsigned int val) { // not used as often, just convert it to a string now. std::ostringstream stream; stream << val; return setBuffer(stream.str()); } /// @brief set the value of the buffer /// @param val the new value /// @return tsv_return_t /// @todo dont use ostringstream, use something faster int affx::TsvFileField::set(uint64_t val) { // not used as often, just convert it to a string now. std::ostringstream stream; stream << val; return setBuffer(stream.str()); } ////////// /// @brief Add the var to the collection of linked vars /// @param var var to add void affx::TsvFileField::linkedvar_push(affx::TsvFileBinding* var) { m_linkedvar_vec.push_back(var); } /// @brief Clear the linkages to bound variables. void affx::TsvFileField::linkedvars_clear() { m_linkedvar_vec.clear(); } /// @brief Assign each linked var a value. void affx::TsvFileField::linkedvars_assign(affx::TsvFile* tsvfile) { // Assign to the linked vars size_t lvar_vec_size=m_linkedvar_vec.size(); int rv=TSV_OK; for (size_t vi=0;vim_ptr_int!=NULL) { rv=get(var->m_ptr_int); } else if (var->m_ptr_string!=NULL) { rv=get(var->m_ptr_string); } else if (var->m_ptr_float!=NULL) { rv=get(var->m_ptr_float); } else if (var->m_ptr_double!=NULL) { rv=get(var->m_ptr_double); } else if (var->m_ptr_uint!=NULL) { rv=get(var->m_ptr_uint); } else if (var->m_ptr_ulonglong!=NULL) { rv=get(var->m_ptr_ulonglong); } else { TSV_ERR_ABORT("internal error: Binding does not have a pointer!"); } // if ((rv!=TSV_OK)&&(var->m_flags&TSV_BIND_REQUIRED)) { // We say "data line" to make it clear we arent counting header lines. std::string errmsg="Conversion error\n"; errmsg+="TSVERROR: '"+tsvfile->m_fileName+"':"+ToStr(tsvfile->lineNumber()+1)+": "; errmsg+="Conversion of required field '"+m_cname+"'"+ " to "+var->get_type_str()+ " of '"+m_buffer+"' failed!\n"; TSV_ERR_ABORT(errmsg); } } } } std::string affx::TsvFileField::get_bound_types_str() { std::string rv=""; size_t lvar_vec_size=m_linkedvar_vec.size(); for (size_t vi=0;vim_ptr_int!=NULL) { rv+="int,"; } else if (var->m_ptr_string!=NULL) { rv+="string,"; } else if (var->m_ptr_float!=NULL) { rv+="float,"; } else if (var->m_ptr_double!=NULL) { rv+="double,"; } else if (var->m_ptr_uint!=NULL) { rv+="uint,"; } else if (var->m_ptr_ulonglong!=NULL) { rv+="longlong,"; } } } // trim trailing "," if (rv!="") { rv.erase(rv.size()-1); } return rv; } /* Unused -- code which wants this generally uses int affx::TsvFileField::setInternCacheSize(int size) { int old_size=m_interncache_size; // -1 means ignore this setting. if (size==TSV_INTERNSIZE_UNSET) { return old_size; } // Zero disables it, positive size enables it m_interncache_size=size; m_interncache_vec.reserve(size); if (size<(int)m_interncache_vec.size()) { // this is not random replacement but is quick. m_interncache_vec.resize(size); } // return old_size; } */ //////////////////// /// @brief Create a TsvFileIndex affx::TsvFileIndex::TsvFileIndex() { init(); } /// @brief Destroy a TsvFileIndex affx::TsvFileIndex::~TsvFileIndex() { } /// @brief Init the member vars to known values void affx::TsvFileIndex::init() { m_bindto_cidx=-1; m_bindto_clvl=-1; m_bindto_cname=""; m_bindto_type=TSV_BINDTO_NONE; m_flags=0; m_kind=0; // clear(); } /// @brief Clear the contents of the index void affx::TsvFileIndex::clear() { m_done=false; m_index_str2line.clear(); m_index_int2line.clear(); m_index_double2line.clear(); m_index_uint2line.clear(); m_index_ulonglong2line.clear(); } /// @brief Add the data from field to the index /// @param field The field to index /// @param line The line number on which it was found void affx::TsvFileIndex::data_add(TsvFileField* field,linenum_t line) { APT_ERR_ASSERT(field!=NULL,"internal error: data_add: field is null."); // if (m_kind==TSV_INDEX_STRING) { // WARNING: field->m_buffer is reused and will be overwritten. // So we need to force a fresh copy to be made by modifying the string. // so first we "copy" the string. // std::string str_copy=field->m_buffer; // then we modify it -- this forces a copy // str_copy.append(1,' '); // then we undo the modification // str_copy.resize(str_copy.size()-1); // now we can use it in our map std::string str_copy; str_copy.append(field->m_buffer.begin(),field->m_buffer.end()); m_index_str2line.insert(make_pair(str_copy,line)); return; } if (m_kind==TSV_INDEX_INT) { int tmp_int; if (field->get(&tmp_int)==TSV_OK) { m_index_int2line.insert(make_pair(tmp_int,line)); } return; } if (m_kind==TSV_INDEX_DOUBLE) { double tmp_double; if (field->get(&tmp_double)==TSV_OK) { m_index_double2line.insert(make_pair(tmp_double,line)); } return; } if (m_kind==TSV_INDEX_UINT) { unsigned int tmp_uint; if (field->get(&tmp_uint)==TSV_OK) { m_index_uint2line.insert(make_pair(tmp_uint,line)); } return; } if (m_kind==TSV_INDEX_ULONGLONG) { uint64_t tmp_ulonglong; if (field->get(&tmp_ulonglong)==TSV_OK) { m_index_ulonglong2line.insert(make_pair(tmp_ulonglong,line)); } return; } } /// @brief Template to query an index for matching values /// @param results Where to stick the line numbers /// @param T1 Datatype of the maps /// @param map map to search /// @param op comparison operator /// @param val value to compare against /// @return tsv_return_t template int affx::TsvFileIndex::results_append_tmpl(std::vector& results, std::multimap& mmap, int op, T1& val) { typename std::multimap::iterator r_start,r_end; if (op==TSV_OP_LT) { r_start=mmap.begin(); r_end=mmap.lower_bound(val); } else if (op==TSV_OP_LTEQ) { r_start=mmap.begin(); r_end=mmap.upper_bound(val); } else if (op==TSV_OP_EQ) { r_start=mmap.lower_bound(val); r_end=mmap.upper_bound(val); } else if (op==TSV_OP_GTEQ) { r_start=mmap.lower_bound(val); r_end=mmap.end(); } else if (op==TSV_OP_GT) { r_start=mmap.upper_bound(val); r_end=mmap.end(); } else { TSV_ERR_ABORT("Invalid operation in results_append. op="+ToStr(op)); return TSV_ERR_UNKNOWN; } // Stick the range into find_result while (r_start!=r_end) { //printf("r_start=(%4d,%4d)\n",(*r_start).first,(int)(*r_start).second); results.push_back((*r_start).second); r_start++; } // return TSV_OK;; } /// @brief Append the matching string results to the vec /// @param results where to append /// @param op comparison to do /// @param val value to compare to /// @return tsv_return_t int affx::TsvFileIndex::results_append(std::vector& results,int op,std::string val) { // Use the template for the different types return results_append_tmpl(results,m_index_str2line,op,val); } /// @brief Append the matching string results to the vec /// @param results where to append /// @param op comparison to do /// @param val value to compare to /// @return tsv_return_t int affx::TsvFileIndex::results_append(std::vector& results,int op,int val) { return results_append_tmpl(results,m_index_int2line,op,val); } /// @brief Append the matching string results to the vec /// @param results where to append /// @param op comparison to do /// @param val value to compare to /// @return tsv_return_t int affx::TsvFileIndex::results_append(std::vector& results,int op,double val) { return results_append_tmpl(results,m_index_double2line,op,val); } /// @brief Append the matching string results to the vec /// @param results where to append /// @param op comparison to do /// @param val value to compare to /// @return tsv_return_t int affx::TsvFileIndex::results_append(std::vector& results,int op,unsigned int val) { return results_append_tmpl(results,m_index_uint2line,op,val); } /// @brief Append the matching string results to the vec /// @param results where to append /// @param op comparison to do /// @param val value to compare to /// @return tsv_return_t int affx::TsvFileIndex::results_append(std::vector& results,int op,uint64_t val) { return results_append_tmpl(results,m_index_ulonglong2line,op,val); } /// @brief Dump the contents of an index /// @param T1 Datatype of the maps key /// @param map the map to dump template void affx::TsvFileIndex::dump_map(std::multimap& mmap) { typename std::multimap::iterator i; int cnt=0; for (i=mmap.begin();i!=mmap.end();i++) { cout << cnt++ << " : '" << (*i).first << "' : '" << (*i).second << "'\n"; } } /// @brief Dump the contents of the index void affx::TsvFileIndex::dump() { printf("index (clvl=%2d,cidx=%2d,kind=%2d) ==========\n",m_bindto_clvl,m_bindto_cidx,m_kind); if (m_kind==TSV_INDEX_STRING) { dump_map(m_index_str2line); } if (m_kind==TSV_INDEX_INT) { dump_map(m_index_int2line); } if (m_kind==TSV_INDEX_DOUBLE) { dump_map(m_index_double2line); } if (m_kind==TSV_INDEX_UINT) { dump_map(m_index_uint2line); } if (m_kind==TSV_INDEX_ULONGLONG) { dump_map(m_index_ulonglong2line); } } //////////////////// /// @brief Init the slots of the object. affx::TsvFileHeaderLine::TsvFileHeaderLine() { m_key=""; m_value=""; m_order=0; } /// @brief Init the slots to the values given. affx::TsvFileHeaderLine::TsvFileHeaderLine(const std::string& key,const std::string& value,int order) { m_key=key; m_value=value; m_order=order; } /// @brief Less than operator for comparing TsvFileHeaderLines bool affx::TsvFileHeaderLine::operator<(const affx::TsvFileHeaderLine& b) const { if (m_order==b.m_order) { return (m_keysetEscapeOk(whatEverYouWant) // #ifdef WIN32 // m_optEscapeOk=false; // #else m_optEscapeOk=true; // #endif m_optEscapeChar='\\'; m_optQuoteChar1='\''; m_optQuoteChar2='"'; m_optFieldSep=TSV_CHAR_TAB; m_optHasColumnHeader=true; m_optQuoteChar='"'; m_optThrowOnError=false; m_optEndl=TSV_EOL; m_optLinkVarsOnOpen = true; m_headName = "header"; m_optPrecision=-1; } /// @brief Set the option abortonerror /// @param value the new value. void affx::TsvFile::setAbortOnError(bool value) { m_optAbortOnError=value; } /// @brief Returns the value of m_eof. /// @return should be true when at the end of the file. bool affx::TsvFile::eof() { return m_eof; } /// @brief What line did we just read? (external) /// @return The line number of the line which was just read. linenum_t affx::TsvFile::lineNumber() { // line_num is the NEXT LINE to be read, so subtract one return m_lineNum-1; } /// @brief The indentation level of the line just read. /// @return the level int affx::TsvFile::lineLevel() { return m_lineLvl; } /// @brief Set the filename for this Tsv /// @param filename /// @return int affx::TsvFile::setFilename(const std::string& filename) { m_fileName=filename; return TSV_OK; } /// @brief init a new TsvFile. void affx::TsvFile::init() { #ifdef TSV_USER_RDBUF m_rdbuf=NULL; #endif m_headers_curptr=NULL; } /// @brief clear all the resources of a TsvFile. void affx::TsvFile::clear() { // close the file before zapping everything... close(); // ...zap the filename m_fileName.clear(); // dispose of all the bindings. clearBindings(); // and the indexes clearIndexes(); // and the headers clearHeaders(); // and the column headers clearColumnHeaders(); // m_lineLvl=0; m_lineNum=0; m_eof=false; m_index_done=false; } /// @brief Clear the bindings of columns to variables. void affx::TsvFile::clearBindings() { // clear the linked vars in the columns linkvars_clear(); // release the memory of the vars for (unsigned int vi=0;viclear(); delete m_index_vec[i]; } } } /// @brief Clear all the fields. (set them all to null) void affx::TsvFile::clearFields() { clearFieldsBelowClvl(0); } /// @brief Clear the columns at greater indents /// @param clvl_start Clear fields at or larger than this level. void affx::TsvFile::clearFieldsBelowClvl(int clvl_start) { assert(clvl_start>=0); unsigned int clvl_size=m_column_map.size(); for (unsigned int clvl=clvl_start;clvl=0)&&(clvl<(int)m_column_map.size())) { return (int)m_column_map[clvl].size(); } return 0; } /// @brief Are we looking at the end of a line? /// @return true if at end of line. bool affx::TsvFile::f_lookingat_eol() { int c; // the end of a file is the end of a line. if (!m_fileStream.good()) { return true; } // //c=m_fileStream.peek(); c=M_PEEK(); if (c==EOF) { return true; } if ((c==TSV_CHAR_CR)||(c==TSV_CHAR_LF)) { return true; } return false; } /// @brief Read a line from a TsvFile into a string. /// @param line A buffer to modify /// @return tsv_return_t /// @remarks Lines end with (0x0a "\n") (0x0d "\r") or (0x0d 0x0a "\r\n") int affx::TsvFile::f_getline(std::string& line) { int c; line.clear(); // we have some sort of error if (!m_fileStream.good()) { return (TSV_ERR_FILEIO); } while (1) { // get a char... //c=m_fileStream.get(); c=M_GETC(); // eof? if ((c==TSV_CHAR_EOF)||!m_fileStream.good()) { break; } // LF ends a line if (c==TSV_CHAR_LF) { break; } // CR ends a line too. if (c==TSV_CHAR_CR) { // c=m_fileStream.peek(); c=M_PEEK(); // CR could be followed by a LF if (c==TSV_CHAR_LF) { //c=m_fileStream.get(); c=M_GETC(); break; } } // push it on line.append(1,(char)c); } // return TSV_OK; } ////////// /// @brief clear all the headers. void affx::TsvFile::clearHeaders() { m_headers_bykey.clear(); // int v_size=(int)m_headers_vec.size(); for (int vi=0;vifirst!=key)) { return TSV_HEADER_LAST; } // what we are returning... val=m_headers_iter->second->m_value; // point to the next one... m_headers_iter++; return TSV_OK; } /// @brief Advance to the next header of the file and set the values /// @param key key of the header /// @param val value of the header /// @return int affx::TsvFile::headersNext(std::string& key,std::string& val) { // skip comment headers do { nextHeaderPtr(); if (m_headers_curptr==NULL) { return TSV_HEADER_LAST; } } while (m_headers_curptr->m_key==""); key=m_headers_curptr->m_key; val=m_headers_curptr->m_value; // headers next should never return an empty key. assert(key!=""); return TSV_OK; } /// @brief How many headers are there? /// @return Count of the headers int affx::TsvFile::headersCount() { // test here to save a method call. if (m_headers_vec_packed==false) { repackHeaders(); } return (int)m_headers_vec.size(); } /// @brief Is the key a legal key to have in a header? /// @param key the key to check /// @return tsv_return_t int affx::TsvFile::headerKeyLegal(const std::string& key) { // We dont allow '=' in the key if (key.find('=')!=std::string::npos) { return (TSV_ERR_FORMAT); } // Nor do we allow headers which start with "header" -- they are reserved if ((key.size()==(m_headName.length()+1))&&(key.find(m_headName)==0)) { return (TSV_ERR_FORMAT); } // if not rejected it is ok... return TSV_OK; } /// @brief Add a key-value to the headers /// @param key string key /// @param val string value /// @remarks sort order defaults to "0" int affx::TsvFile::addHeader(const std::string& key,const std::string& val) { // should a copy be done here? // might that be the source of the windows problems? return addHeader(key,val,(int)m_headers_vec.size()); } /// @brief Add a key-value to the headers plus its sort order. /// @param key string key /// @param val string val /// @param order the sort order /// @return tsv_error_t int affx::TsvFile::addHeader(const std::string& key,const std::string& val,int order) { int rv; if ((rv=headerKeyLegal(key))!=TSV_OK) { return rv; } return addHeader_nocheck(key,val,order); } /// @brief Add a int value as a header /// @param key name of header /// @param val value to set /// @return int affx::TsvFile::addHeader(const std::string& key,int val) { // cast to a string and add it. std::ostringstream stream; stream << val; addHeader(key,stream.str()); return TSV_OK; } /// @brief Add a int value as a header /// @param key name of header /// @param val double value to set /// @return int affx::TsvFile::addHeader(const std::string& key,double val) { // cast to a string and add it. std::ostringstream stream; stream << val; std::string valString = stream.str(); // For Windows, check to see if a three digit exponent with a leading zero is // being used for double representation. if so, remove the extra zero (2 // exponent digits conforms to 1999 C standard) //e.g. 1.234e003 and 1.234e-003 become 1.234e03 and 1.234e-03 #ifdef WIN32 if (valString.size() >= 5 && valString.at(valString.size() - 3) == '0' && (valString.at(valString.size() - 4) == 'e' || valString.at(valString.size() - 5) == 'e' ) ) { valString.erase(valString.size() - 3, 1); } #endif addHeader(key,valString); return TSV_OK; } /// @brief Add a series of headers as key=val1, key=val2, key=valN /// @param key name of header /// @param val vector of values to add. /// @return int affx::TsvFile::addHeader(const std::string& key,const std::vector& val) { for (size_t i=0;i& comments) { for (size_t i=0;i& key_vec) { string val; for (int i=0;i<(int)key_vec.size();i++) { if (f_tsv.getHeader(key_vec[i],val)==TSV_OK) { addHeader(prefix+key_vec[i],val); } } return TSV_OK; } /// @brief Note that the "headerN" lines may not have been generated yet. /// @return The number of headers for this file. int affx::TsvFile::headerCount() { repackHeaders(); // this removes null ptrs so headerCount==size() return m_headers_vec.size(); } /// @brief Get the value of a header /// @param key which header to get /// @param val string to bind the value to /// @return tsv_return_t int affx::TsvFile::getHeader(const std::string& key,std::string& val) { m_headers_iter=m_headers_bykey.find(key); if (m_headers_iter==m_headers_bykey.end()) { return TSV_ERR_NOTFOUND; } val=(*m_headers_iter).second->m_value; return TSV_OK; } /// @brief Get the integer value of a header /// @param key key to get /// @param val integer value to be modfied. /// @return tsv_error_t int affx::TsvFile::getHeader(const std::string& key,int& val) { std::string tmp_str; const char* str_in; char* str_out; if (getHeader(key,tmp_str)==TSV_OK) { str_in=tmp_str.c_str(); int tmp_val=strtol(str_in,&str_out,0); if (str_in==str_out) { return TSV_ERR_FORMAT; } val=tmp_val; return TSV_OK; } return TSV_ERR_NOTFOUND; } /// @brief Get the integer value of a header /// @param key key to get /// @param val integer value to be modfied. /// @return tsv_error_t int affx::TsvFile::getHeader(const std::string& key,double& val) { std::string tmp_str; const char* str_in; char* str_out; if (getHeader(key,tmp_str)==TSV_OK) { str_in=tmp_str.c_str(); double tmp_val=strtod(str_in,&str_out); if (str_in==str_out) { return TSV_ERR_FORMAT; } val=tmp_val; return TSV_OK; } return TSV_ERR_NOTFOUND; } /// @brief Get string values of all headers exactly matching a key /// @param key key to find /// @param val string vector for matching values. /// @return tsv_error_t int affx::TsvFile::getHeader(const std::string& key,std::vector& val) { val.clear(); return getHeaderAppend(key,val); } /// @brief Append string values of all headers exactly matching a key /// @param key key to find /// @param val string vector for appended values. /// @return tsv_error_t int affx::TsvFile::getHeaderAppend(const std::string& key,std::vector& val) { int result = TSV_ERR_NOTFOUND; affx::TsvFile::header_iter_t i; for (i=m_headers_bykey.find(key);(i!=m_headers_bykey.end())&&(i->first==key);i++) { val.push_back(i->second->m_value); result = affx::TSV_OK; } return result; } /// @brief Get string values of all headers containing the substring /// @param key key substring to find /// @param val string vector for matching values. /// @return tsv_error_t int affx::TsvFile::getHeaderMatchingKeySubstr(const std::string& key,std::vector& val) { val.clear(); return getHeaderMatchingKeySubstrAppend(key,val); } /// @brief Append string values of all headers containing the substring /// @param key key substring to find /// @param val string vector for appended values. /// @return tsv_error_t int affx::TsvFile::getHeaderMatchingKeySubstrAppend(const std::string& key,std::vector& val) { int result = TSV_ERR_NOTFOUND; affx::TsvFile::header_iter_t i; for (i=m_headers_bykey.begin();i!=m_headers_bykey.end();i++) { if (i->first.find(key)!=string::npos) { val.push_back(i->second->m_value); result = affx::TSV_OK; } } return result; } /// @brief Does the file have a key==value header which matches /// @param key key to check /// @param val value to check /// @return true if there is a matching key==val header /// @remarks This is handy to check for matching chip types. int affx::TsvFile::hasHeaderEqualTo(const std::string& key,const std::string& val) { // printf("hasHeaderEqualTo('%s','%s')\n",key.c_str(),val.c_str()); // scan the headers. affx::TsvFile::header_iter_t i; for (i=m_headers_bykey.find(key);(i!=m_headers_bykey.end())&&(i->first==key);i++) { // printf("hasHeaderEqualTo: check '%s'/'%s')\n",i->first.c_str(),i->second->m_value.c_str()); if (i->second->m_value==val) { return TSV_OK; } } // didnt find it. return TSV_ERR_NOTFOUND; } void affx::TsvFile::repackHeaders() { if (m_headers_vec_packed==true) { return; } // std::vector tmp_vec; std::vector::iterator i; // save the non-null ones... for (i=m_headers_vec.begin();i!=m_headers_vec.end();i++) { if (*i!=NULL) { tmp_vec.push_back(*i); } } // ...and put them back. m_headers_vec=tmp_vec; // we are packed m_headers_vec_packed=true; } /// @brief Sort the headers of this TSV file by (order,key,value) void affx::TsvFile::resortHeaders() { // repackHeaders(); // put headers in order sort(m_headers_vec.begin(),m_headers_vec.end(),affx::header_ptr_less); } /// @brief Remove the headers named with the key. /// @param key the header key value. /// @return tsv_error_t int affx::TsvFile::deleteHeaders(const std::string& key) { int v_size=(int)m_headers_vec.size(); for (int vi=0;vim_key==key)) { deleteHeaderPtr(m_headers_vec[vi]); } } return affx::TSV_OK; } int affx::TsvFile::deleteHeaders(const std::vector& keys) { for (size_t i=0;isecond==hdrptr) { m_headers_bykey.erase(mi); goto START_DEL_HEADER_PTR; } } // erase it from the vector, dont resize it. int find_cnt=0; int v_size=(int)m_headers_vec.size(); for (int vi=0;vi0) { m_headers_vec_packed=false; delete hdrptr; return TSV_OK; } // didnt find it in the list. return TSV_ERR_NOTFOUND; } ////////// /// @brief Read a classic "v1" header from the top of a file /// @return tsv_return_t /// @remarks Attempts to guess if this is a tab or comma seperated file int affx::TsvFile::f_read_header_v1() { int rv; std::string line; std::vector colname_vec; if ((rv=f_getline(line))!=TSV_OK) { return rv; } // Try and sense what the fieldsep is? if (m_optAutoSenseSep==true) { int cnt_fieldsep=countchars(line,m_optFieldSep); if (cnt_fieldsep==0) { // whoah We didnt see our fieldsep, take a guess... int cnt_tabs=countchars(line,TSV_CHAR_TAB); int cnt_comma=countchars(line,TSV_CHAR_COMMA); if (cnt_tabs>0) { m_optFieldSep=TSV_CHAR_TAB; } else if (cnt_comma>0) { m_optFieldSep=TSV_CHAR_COMMA; } } } // set the columns splitstr(line,m_optFieldSep,colname_vec); for (unsigned int col=0;col colname_vec; splitstr(i->second->m_value,m_optFieldSep,colname_vec); for (unsigned int col=0;col=(int)m_column_map.size()) { // plus one as we are going to use 'clvl' as an index... m_column_map.resize(clvl+1); m_cnametocidx_map.resize(clvl+1); } // expand the width if needed. if (cidx>=(int)m_column_map[clvl].size()) { m_column_map[clvl].resize(cidx+1); } // record info about this column m_column_map[clvl][cidx].m_clvl=clvl; m_column_map[clvl][cidx].m_cidx=cidx; m_column_map[clvl][cidx].m_cname=cname; m_column_map[clvl][cidx].m_ctype=ctype; // map column name to column idx m_cnametocidx_map[clvl][cname]=cidx; // default precision if set if (m_optPrecision>=0) { m_column_map[clvl][cidx].setPrecision(m_optPrecision); } return (TSV_OK); } /// @brief Look up the cidx of a column /// @param clvl column level at which to look /// @param cname column name /// @return level or less than zero on error int affx::TsvFile::cname2cidx(int clvl,const std::string& cname,tsv_optionflag_t options) { if ((clvl<0)||(clvl>=(int)m_column_map.size())) { return TSV_ERR_NOTFOUND; } map::iterator i; i=m_cnametocidx_map[clvl].find(cname); // found it? if (i!=m_cnametocidx_map[clvl].end()) { return ((*i).second); } // is this a case sensitve match? if ((options&affx::TSV_OPT_CASEINSENSTIVE)!=affx::TSV_OPT_CASEINSENSTIVE) { // yes... so fail now. return TSV_ERR_NOTFOUND; } // Doing a case-insenstive search is rare, we dont keep an index. // Rather, we do a linear search across the keys. std::string cname_lc=affx::tolower(cname); std::string key_lc; for (i=m_cnametocidx_map[clvl].begin();i!=m_cnametocidx_map[clvl].end();i++) { key_lc=affx::tolower((*i).first); if (key_lc==cname_lc) { // found a lowercase match... return ((*i).second); } } // isnt there as lowercase either. return TSV_ERR_NOTFOUND; } // Requres "clvl" and "options" to be defined. #define CNAME_FIND(_name) { int rv; rv=cname2cidx(clvl,_name,options); if (rv>=0) { return rv; } } /// @brief Find a column index by one of its names. /// @param clvl level /// @param alias1 first alias to try /// @param alias2 second alias to try /// @return int affx::TsvFile::cname2cidx(int clvl, const std::string& alias1, const std::string& alias2, tsv_optionflag_t options) { CNAME_FIND(alias1); CNAME_FIND(alias2); return TSV_ERR_NOTFOUND; } /// @brief Find a column index by one of its names. /// @param clvl level /// @param alias1 first alias to try /// @param alias2 second alias to try /// @param alias3 third alias to try /// @return int affx::TsvFile::cname2cidx(int clvl, const std::string& alias1, const std::string& alias2, const std::string& alias3, tsv_optionflag_t options) { CNAME_FIND(alias1); CNAME_FIND(alias2); CNAME_FIND(alias3); return TSV_ERR_NOTFOUND; } /// @brief Find a column index by one of its names. /// @param clvl level /// @param alias1 first alias to try /// @param alias2 second alias to try /// @param alias3 third alias to try /// @param alias4 fourth alias to try /// @return int affx::TsvFile::cname2cidx(int clvl, const std::string& alias1, const std::string& alias2, const std::string& alias3, const std::string& alias4, tsv_optionflag_t options) { CNAME_FIND(alias1); CNAME_FIND(alias2); CNAME_FIND(alias3); CNAME_FIND(alias4); return TSV_ERR_NOTFOUND; } /// @brief Covert a column index to a column index /// @param clvl level /// @param cidx index /// @return column index /// @remarks This exists to make the templates eaiser to write int affx::TsvFile::cname2cidx(int clvl,int cidx) { if ((clvl<0)||(clvl>=(int)m_column_map.size())) { return TSV_ERR_NOTFOUND; } return cidx; } ////////// /// @brief Get a pointer to a column given clvl and cidx /// @param clvl level of column /// @param cidx index of column /// @return NULL if not found TsvFileField* affx::TsvFile::clvlcidx2colptr(int clvl,int cidx) { if ((clvl<0)|| (clvl>=(int)m_column_map.size())|| (cidx<0)|| (cidx>=(int)m_column_map[clvl].size())) { return NULL; } return &m_column_map[clvl][cidx]; } /// @brief Return a pointer to the column /// @param clvl level of column /// @param cname name of column /// @return NULL if not found TsvFileField* affx::TsvFile::clvlcidx2colptr(int clvl,const std::string& cname) { return clvlcidx2colptr(clvl,cname2cidx(clvl,cname)); } /// @brief Get the name of a column by its level and index /// @param clvl column level /// @param cidx column index /// @param cname where to put the name /// @return tsv_return_t int affx::TsvFile::cidx2cname(int clvl,int cidx,std::string& cname) { TsvFileField* colptr=clvlcidx2colptr(clvl,cidx); if (colptr==NULL) { return (TSV_ERR_NOTFOUND); } cname=colptr->m_cname; return TSV_OK; } std::string affx::TsvFile::getColumnName(int clvl,int cidx) { std::string cname; cidx2cname(clvl,cidx,cname); return cname; } int affx::TsvFile::setPrecision(int clvl,int cidx,int places) { TsvFileField* colptr=clvlcidx2colptr(clvl,cidx); if (colptr==NULL) { return (TSV_ERR_NOTFOUND); } return colptr->setPrecision(places); } int affx::TsvFile::setPrecision(int clvl,const std::string& cname,int places) { return setPrecision(clvl,cname2cidx(clvl,cname),places); } ////////// /// @brief open a TSV file /// @param fname name of file to open /// @return TSV_OK or an error int affx::TsvFile::open(const std::string& fname) { int rv; close(); // remeber the name m_fileName=fname; // if (m_fileName.empty()) { TSV_ERR_ABORT("Cant open an empty filename. (filename='"+m_fileName+"')"); } // should we check to see if this file is wacked out? if (m_optCheckFormatOnOpen) { if (Fs::isCalvinFile(m_fileName)) { TSV_ERR_ABORT("This file is a Calvin file. (filename='"+m_fileName+"')"); return TSV_ERR_CALVIN; } if (Fs::isHdf5File(m_fileName)) { TSV_ERR_ABORT("This file is an HDF5 file. (filename='"+m_fileName+"')"); return TSV_ERR_HDF5; } // test for binary last, as it is a general test. // Failing for this text file, commented out for Windows. // //26357 15-Dec-2010 19:09:40 FATAL ERROR:TsvFile.cpp:2664: This file appears to be binary. (filename='../../regression-data/data/idata/translation/regression-data/20080109_31set_DMET3_cn_16various.txt') #ifndef _WIN32 if (Fs::isBinaryFile(m_fileName)) { TSV_ERR_ABORT("This file appears to be binary. (filename='"+m_fileName+"')"); return TSV_ERR_BINARY; } #endif } // clear before opening -- errors are sticky. m_fileStream.clear(); // hello?!? Fs::aptOpen(m_fileStream,fname,std::fstream::in|std::fstream::binary); // Check an error opening the file. if(!m_fileStream.is_open() || !m_fileStream.good()) { if (m_optAbortOnError) { TSV_ERR_ABORT("open: Could not open file: '" + fname + "' to read."); } else { return (TSV_ERR_FILEIO); } } // now throw an exception if something really bad happens. m_fileStream.exceptions(ios_base::badbit|ios_base::failbit); #ifdef TSV_USE_RDBUF // This appears to be faster than calling ".get()" m_rdbuf=m_fileStream.rdbuf(); #endif //printf("### opening: '%s' (rdstate=%4d,%s)...\n", // m_fileName.c_str(), // m_fileStream.rdstate(), // (m_fileStream.good()?"good":"not good")); //file_buf=m_fileStream.rdbuf(); if (!m_fileStream.good()) { return (TSV_ERR_FILEIO); } if ( m_rawOpen ) { return TSV_OK; } // parse the headers f_read_headers(); // establish the links if (m_optLinkVarsOnOpen) { rv=formatOk(); if (rv!=TSV_OK) { Verbose::out(1, "TSV: warning: '" + fname + "': bad format (=" + ToStr(rv) + ")"); // printf("TSV: warning: '%s': bad format (=%d)\n",fname.c_str(),rv); } } // redudant with above? linkvars_maybe(); return TSV_OK; } /// @brief Open a table file. Tables dont have column headers /// @param fname /// @return int affx::TsvFile::openTable(const std::string& fname) { m_optHasColumnHeader=false; m_optAutoColumns=true; // return open(fname); } /// @brief /// @param fname /// @return int affx::TsvFile::openCsv(const std::string& fname) { m_optAutoColumns=true; m_optAutoDequote=true; m_optAutoSenseSep=false; // must be ',' m_optEscapeChar='\\'; // The escape char is '\' m_optEscapeOk=true; m_optFieldSep=','; // set the field sep char m_optHasColumnHeader=true; // return open(fname); } /// @brief Close the TsvFile int affx::TsvFile::close() { // close an open stream if (m_fileStream.is_open()) { m_fileStream.close(); } return TSV_OK; } /// @brief Flushs the data to the file. /// If you are debugging, you might want to explictly flush the output. /// It is not required. Be warned that this slows down the IO. void affx::TsvFile::flush() { if (m_fileStream.is_open()) { m_fileStream.flush(); } } /// @brief Gobble up the tabs without skipping over eol /// @return the number of tabs skipped int affx::TsvFile::f_advance_tabs() { int c; int tabcnt=0; int maxtabs=(int)(m_column_map.size()-1); m_line_fpos=m_fileStream.tellg(); // first nibble off expected tabs... while (tabcnt 0) { TSV_ERR_ABORT("Error in: '" + m_fileName + "'" + " at line " + ToStr(m_lineNum) + ":" + " Not allowed to have comment characters except on first character of line."); } M_UNGETC(c); return TSV_LINE_COMMENT; } else if ((c==TSV_CHAR_CR)||(c==TSV_CHAR_LF)) { //m_fileStream.putback((char)c); M_UNGETC(c); return (TSV_LINE_BLANK); } else { // m_fileStream.putback((char)c); M_UNGETC(c); break; } } // skip possibly extra whitespace //c=m_fileStream.peek(); c=M_PEEK(); if (c==TSV_CHAR_COMMENT) { return (TSV_LINE_COMMENT); } if ((c==TSV_CHAR_CR)||(c==TSV_CHAR_LF)||(c==EOF)) { return (TSV_LINE_BLANK); } if ((c==TSV_CHAR_SPACE)||(c==TSV_CHAR_TAB)) { std::fstream::pos_type skipstart; int spaceCnt = -1; skipstart=m_fileStream.tellg(); // do { //c=m_fileStream.get(); c=M_GETC(); spaceCnt++; } while ((c==TSV_CHAR_SPACE)||(c==TSV_CHAR_TAB)); // what are we looking at now? if (c==TSV_CHAR_COMMENT) { //m_fileStream.putback(c); if(spaceCnt > 0) { TSV_ERR_ABORT("Error in: " + m_fileName + " at line: " + ToStr(m_lineNum) + "." " Not allowed to have comment characters except on first character of line."); } M_UNGETC(c); return (TSV_LINE_COMMENT); } if ((c==TSV_CHAR_CR)||(c==TSV_CHAR_LF)||(c==EOF)) { //m_fileStream.putback(c); M_UNGETC(c); return (TSV_LINE_BLANK); } // found a normal char, rewind to start of skip m_fileStream.seekg(skipstart); } return tabcnt; } /// @brief advance over the end of the line (eol) /// @return the number of chars read to get there (not including eol chars) /// @remarks The eol chars are not included in the count as there could be 1 or 2 int affx::TsvFile::f_advance_eol() { int charcnt=0; int c; if (!m_fileStream.good()) { return (TSV_ERR_FILEIO); } /// @todo this is duplicated from getline but we dont want to accum the data so ok? while (1) { // get a char... // c=m_fileStream.get(); c=M_GETC(); // eof? //if (!m_fileStream.good()) { // break; //} if (c==EOF) { break; } // LF ends a line if (c==TSV_CHAR_LF) { break; } // CR ends a line too. if (c==TSV_CHAR_CR) { //c=m_fileStream.peek(); c=M_PEEK(); // CR could be followed by a LF if (c==TSV_CHAR_LF) { //c=m_fileStream.get(); c=M_GETC(); break; } } // count the chars charcnt++; } // we just went forward a line m_lineNum++; // return charcnt; } /// @brief Read data into a column from the file /// @param col column to fill /// @return tsv_return_t /// @remarks Dont read past the end of the line. /// Only use f_advance_eol to do that. int affx::TsvFile::f_read_column(TsvFileField* col) { int c; //char* buf_ptr; //int buf_len; int bi; char in_quotes; // assert(col!=NULL); // mark this column as being null. col->m_isnull=true; col->m_value_int_done=false; col->m_value_double_done=false; col->m_value_uint_done=false; col->m_value_ulonglong_done=false; // Nothing to be read. if (!m_fileStream.good()) { col->m_buffer=""; return (TSV_ERR_FILEIO); } // Mark it as not being null. col->m_isnull=false; in_quotes=0; // The common case is the field is the same size as last time. // Copy the chars into an already existing string. bi=0; col->m_buffer.resize(0); while (1) { //c=m_fileStream.get(); c=M_GETC(); // if ((m_optEscapeChar==c)&&(m_optEscapeOk==true)) { //c=m_fileStream.get(); c=M_GETC(); c=unescapeChar(c); } // start or end of quotes? else if (((c==m_optQuoteChar1)&&(m_optQuoteChar1!=0))|| ((c==m_optQuoteChar2)&&(m_optQuoteChar2!=0))) { if (in_quotes==0) { // open in_quotes=c; } else if (c==in_quotes) { // close in_quotes=0; } } else if ((m_optFieldSep==c)&&(in_quotes==0)) { col->m_buffer.resize(bi); // trunc the buffer to size. break; // discard the field-sep-char } // end of field? (short value) else if ((c==TSV_CHAR_CR)||(c==TSV_CHAR_LF)) { //m_fileStream.putback(c); // leave eol char M_UNGETC(c); col->m_buffer.resize(bi); // trunc break; } // ran out of data else if (c==EOF) { if (bi==0) { col->m_isnull=true; // revise our opinion about this being non-null } col->m_buffer.resize(bi); return (TSV_ERR_FILEIO); } // add to our buffer col->m_buffer.append(1,(char)c); // buffer full, append bi++; } // dequote the column? if (col->m_optAutoDequote==true) { trim(col->m_buffer); dequote(col->m_buffer); } //printf("z\n"); fflush(NULL); return TSV_OK; } /// @brief Read the columns of this level /// @param line_clvl column level at which we are reading /// @return tsv_return_t int affx::TsvFile::f_read_columns(int line_clvl) { assert(line_clvl>=0); //clearFieldsBelowClvl(line_clvl+1); // profiling if (line_clvl<(int)m_column_map.size()) { unsigned int cidx_size=m_column_map[line_clvl].size(); for (unsigned int cidx=0;cidxislinked=false; TsvFileField* col=clvlcidx2colptr(clvl,cidx); if (col==NULL) { // out of bounds m_linkvars_errors++; return; } // found the column, copy over info col->linkedvar_push(var); var->islinked=true; } /// @brief Link a binding var to a column /// @param var var to link void affx::TsvFile::linkvar_link(TsvFileBinding* var) { assert(var!=NULL); //printf("link=%p errs=%d\n",var,m_linkvars_errors); if (var->m_bindto_type==TSV_BINDTO_NONE) { // dead } else if (var->m_bindto_type==TSV_BINDTO_CIDX) { // by index linkvar_column(var->m_bindto_clvl,var->m_bindto_cidx,var); } else if (var->m_bindto_type==TSV_BINDTO_CNAME) { // by name int cidx=cname2cidx(var->m_bindto_clvl,var->m_bindto_cname); if (cidx>=0) { linkvar_column(var->m_bindto_clvl,cidx,var); } } else { TSV_ERR_ABORT("linkvar_link: internal error: m_bindto_type="+ToStr(var->m_bindto_type)); } // check the binding condition... if (var->islinked==true) { // no need to check return; } if ((var->m_flags&TSV_BIND_REQUIRED)!=0) { m_linkvars_errors++; if (m_optAbortOnError) { TSV_ERR_ABORT("Required column: '"+ToStr(var->m_bindto_cname)+"' not found in file: '"+m_fileName+"'."); } return; } if ((var->m_flags&TSV_BIND_WARNING)!=0) { m_linkvars_warnings++; return; } } /// @brief Examine the list of bindings and establish the linkages /// @return number of unlinked bindings int affx::TsvFile::linkvars_makelinks() { // zap the current linkages linkvars_clear(); // walk the vars and link them all unsigned int m_linkvars_vec_size=m_linkvars_vec.size(); for (unsigned int vi=0;viislinked) { // oh no! if ((m_linkvars_vec[i]->m_flags&(TSV_BIND_WARNING|TSV_BIND_REQUIRED))!=0) { Verbose::out(2, "TSV: warning: did not bind ('" + ToStr(m_linkvars_vec[i]->m_bindto_clvl) + "," + ToStr(m_linkvars_vec[i]->m_bindto_cidx) + ",'" + m_linkvars_vec[i]->m_bindto_cname +"')"); } } } } /// @brief Remove all the bindings. void affx::TsvFile::unbindAll() { clearBindings(); } ////////// /// macro to define code #define TSV_BIND_FUNC(Xtype,Xfield) \ int \ affx::TsvFile::bind(int clvl,int cidx,Xtype* ptr,tsv_bindopt_t flags,int interncache_size) \ { \ TsvFileBinding* var=linkvar_alloc(); \ var->m_bindto_type=TSV_BINDTO_CIDX; \ var->m_bindto_clvl=clvl; \ var->m_bindto_cidx=cidx; \ var->m_flags=flags; \ var->m_interncache_size=interncache_size; \ var->Xfield=ptr; \ \ linkvars_maybe(); \ \ return TSV_OK; \ } \ int \ affx::TsvFile::bind(int clvl,const std::string& cname,Xtype* ptr,tsv_bindopt_t flags,int interncache_size) \ { \ TsvFileBinding* var=linkvar_alloc(); \ var->m_bindto_type=TSV_BINDTO_CNAME; \ var->m_bindto_clvl=clvl; \ var->m_bindto_cname=cname; \ var->m_flags=flags; \ var->m_interncache_size=interncache_size; \ var->Xfield=ptr; \ \ linkvars_maybe(); \ \ return TSV_OK; \ } // Expand the above TSV_BIND_FUNC(std::string,m_ptr_string); TSV_BIND_FUNC(int,m_ptr_int); TSV_BIND_FUNC(double,m_ptr_double); TSV_BIND_FUNC(float,m_ptr_float); TSV_BIND_FUNC(unsigned int,m_ptr_uint); TSV_BIND_FUNC(uint64_t,m_ptr_ulonglong); // dont need this any more. #undef TSV_BIND_FUNC ////////// /// @brief Rewind back to the start of the data and clear the fields /// @return tsv_return_t int affx::TsvFile::rewind() { clearFields(); m_fileStream.clear(); m_fileStream.seekg(m_fileDataPos); // m_lineLvl=0; m_lineNum=0; m_eof=false; // if (!m_fileStream.good()) { m_eof=true; return (TSV_ERR_FILEIO); } return TSV_OK; } /// @brief advance to the next line and bind the values /// @return tsv_return_t int affx::TsvFile::nextLine() { // insure the vars are inplace // we do this after the bind calls // linkvars_maybe(); // some sort of error or eof? //if ((!m_fileStream.good())||(m_fileStream.peek()==EOF)) { if ((!m_fileStream.good())||(M_PEEK()==EOF)) { m_eof=true; return (TSV_ERR_FILEIO); } // insure the vars are inplace //linkvars_maybe(); // count the tabs to figure out what level it is at m_lineLvl=f_advance_tabs(); if (m_lineLvl<0) { f_advance_eol(); return TSV_OK; } // Read the data into the columns return f_read_columns(m_lineLvl); } /// @brief Skip to the next level of data which matches seek_clvl /// @param seek_clvl the level to skip to /// @return tsv_return_t /// @remarks nextLevel will skip over lines which are more indented than /// seek_clvl, but will not skip over lines less indented. int affx::TsvFile::nextLevel(int seek_clvl) { char c = '\0'; // insure the vars are inplace // linkvars_maybe(); // Read lines until we find a line of the correct level. while (1) { if (!m_fileStream.good()) { clearFields(); m_eof=true; return (TSV_ERR_FILEIO); } c = M_PEEK(); // speed tweak to avoid uncessary seek. Unless this is the 0 level // the first character should be a tab. This handles a common case // in pgf files. if(seek_clvl != 0 && c != TSV_CHAR_TAB && c != TSV_CHAR_COMMENT) { return TSV_LEVEL_LAST; } else if (c == EOF) { clearFields(); return TSV_ERR_EOF; } // what level is this line at? m_lineLvl=f_advance_tabs(); // skip the line if it isnt a data line. if (m_lineLvl<0) { f_advance_eol(); continue; } // a match; bind and return if (m_lineLvl==seek_clvl) { return f_read_columns(m_lineLvl); } // a master; dont skip it if (m_lineLvlseek_clvl) { f_advance_eol(); } } return (TSV_ERR_UNKNOWN); } ////////// /// @brief Read a couple of lines of the TsvFile and figure out the types. /// @return int affx::TsvFile::deduce_types() { std::vector clvl_seen; int clvl_seen_cnt; int clvl_max; int clvl; int cidx_max; int cidx; int seen_all_levels=0; // start from the beginning. rewind(); // clvl_seen.resize(getLevelCount(),0); clvl_seen_cnt=0; clvl_max=getLevelCount(); while (nextLine()==affx::TSV_OK) { clvl=lineLevel(); // skip if we have done this level already... if (clvl_seen[clvl]==1) { continue; } // mark being seen. clvl_seen[clvl]=1; clvl_seen_cnt++; // scan the columns cidx_max=getColumnCount(clvl); for (cidx=0;cidxm_buffer.size(); if (col_cur_size>col->m_max_size) { col->m_max_size=col_cur_size; } } } // rewind and return rewind(); return affx::TSV_OK; } /// @brief Return the declared type of this column /// @param clvl /// @param cname /// @return affx::tsv_type_t affx::TsvFile::get_type(int clvl,const std::string& cname) { TsvFileField* col=clvlcidx2colptr(clvl,cname); if (col==NULL) { return (TSV_TYPE_ERR); } return col->get_type(); } /// @brief Return the declared type of this column /// @param clvl /// @param cidx /// @return affx::tsv_type_t affx::TsvFile::get_type(int clvl,int cidx) { TsvFileField* col=clvlcidx2colptr(clvl,cidx); if (col==NULL) { return (TSV_TYPE_ERR); } return col->get_type(); } /// @brief Return the declared type of this column /// @param clvl /// @param cname /// @return affx::tsv_type_t affx::TsvFile::set_type(int clvl,const std::string& cname,affx::tsv_type_t type) { TsvFileField* col=clvlcidx2colptr(clvl,cname); if (col==NULL) { return (TSV_TYPE_ERR); } return col->set_type(type); } /// @brief Return the declared type of this column /// @param clvl /// @param cidx /// @return affx::tsv_type_t affx::TsvFile::set_type(int clvl,int cidx,affx::tsv_type_t type) { TsvFileField* col=clvlcidx2colptr(clvl,cidx); if (col==NULL) { return (TSV_TYPE_ERR); } return col->set_type(type); } ////////// //! Handy snippet of code. #define TSV_GET_VALUE_CIDX() \ TsvFileField* col=clvlcidx2colptr(clvl,cidx); \ if ((col==NULL)||(col->isNull())) { \ return (TSV_ERR_NOTFOUND); \ } \ return col->get(&val); /// @brief Get the string value of a field /// @param clvl The column level level of field /// @param cidx The column index (0-based) of the field /// @param val The string in which to store the value /// @return a tsv_return_t code int affx::TsvFile::get(int clvl,int cidx,std::string& val) { TSV_GET_VALUE_CIDX(); } int affx::TsvFile::get(int clvl,int cidx,short& val) { TSV_GET_VALUE_CIDX(); } /// @brief Get the integer value of a field /// @param clvl The column level level of field /// @param cidx The column index (0-based) of the field /// @param val The integer in which to store the value /// @return a tsv_return_t code int affx::TsvFile::get(int clvl,int cidx,int& val) { TSV_GET_VALUE_CIDX(); } /// @brief Get the double value of a field /// @param clvl The column level level of field /// @param cidx The column index (0-based) of the field /// @param val The double in which to store the value /// @return a tsv_return_t code int affx::TsvFile::get(int clvl,int cidx,double& val) { TSV_GET_VALUE_CIDX(); } /// @brief Get the float value of a field /// @param clvl The column level level of field /// @param cidx The column index (0-based) of the field /// @param val The float in which to store the value /// @return a tsv_return_t code int affx::TsvFile::get(int clvl,int cidx,float& val) { TSV_GET_VALUE_CIDX(); } /// @brief Get the unsigned integer value of a field /// @param clvl The column level level of field /// @param cidx The column index (0-based) of the field /// @param val The integer in which to store the value /// @return a tsv_return_t code int affx::TsvFile::get(int clvl,int cidx,unsigned int& val) { TSV_GET_VALUE_CIDX(); } /// @brief Get the unsigned integer value of a field /// @param clvl The column level level of field /// @param cidx The column index (0-based) of the field /// @param val The integer in which to store the value /// @return a tsv_return_t code int affx::TsvFile::get(int clvl,int cidx,uint64_t& val) { TSV_GET_VALUE_CIDX(); } #undef TSV_GET_VALUE_CIDX /// Handy code to get the value of a column #define TSV_GET_VALUE_CNAME() \ TsvFileField* col=clvlcidx2colptr(clvl,cname); \ if ((col==NULL)||(col->isNull())) { \ return (TSV_ERR_NOTFOUND); \ } \ return col->get(&val); /// @brief Get the string value of a field /// @param clvl The column level level of field /// @param cname the column name /// @param val The string in which to store the value /// @return a tsv_return_t code int affx::TsvFile::get(int clvl,const std::string& cname,std::string& val) { TSV_GET_VALUE_CNAME(); } int affx::TsvFile::get(int clvl,const std::string& cname,short& val) { TSV_GET_VALUE_CNAME(); } /// @brief Get the integer value of a field /// @param clvl The column level level of field /// @param cname the column name /// @param val The integer in which to store the value /// @return a tsv_return_t code int affx::TsvFile::get(int clvl,const std::string& cname,int& val) { TSV_GET_VALUE_CNAME(); } /// @brief Get the double value of a field /// @param clvl The column level level of field /// @param cname the column name /// @param val The double in which to store the value /// @return a tsv_return_t code int affx::TsvFile::get(int clvl,const std::string& cname,double& val) { TSV_GET_VALUE_CNAME(); } /// @brief Get the float value of a field /// @param clvl The column level level of field /// @param cname the column name /// @param val The float in which to store the value /// @return a tsv_return_t code int affx::TsvFile::get(int clvl,const std::string& cname,float& val) { TSV_GET_VALUE_CNAME(); } /// @brief Get the unsigned integer value of a field /// @param clvl The column level level of field /// @param cname the column name /// @param val The integer in which to store the value /// @return a tsv_return_t code int affx::TsvFile::get(int clvl,const std::string& cname,unsigned int& val) { TSV_GET_VALUE_CNAME(); } /// @brief Get the unsigned integer value of a field /// @param clvl The column level level of field /// @param cname the column name /// @param val The integer in which to store the value /// @return a tsv_return_t code int affx::TsvFile::get(int clvl,const std::string& cname,uint64_t& val) { TSV_GET_VALUE_CNAME(); } // cleanup #undef TSV_GET_VALUE_CNAME ////////// #define TSV_GET_VEC_BODY() \ TsvFileField* col=clvlcidx2colptr(clvl,cidx); \ if ((col==NULL)||(col->isNull())) { \ return (TSV_ERR_NOTFOUND); \ } \ return col->get(vec,sep) #define TSV_GET_VEC_FUNC(TYPE) \ int \ affx::TsvFile::get(int clvl,int cidx,std::vector* vec,char sep) \ { \ TSV_GET_VEC_BODY(); \ } \ int \ affx::TsvFile::get(int clvl,const std::string& cidx,std::vector* vec,char sep) \ { \ TSV_GET_VEC_BODY(); \ } TSV_GET_VEC_FUNC(std::string); TSV_GET_VEC_FUNC(int); TSV_GET_VEC_FUNC(float); TSV_GET_VEC_FUNC(double); #undef TSV_GET_VEC_BODY #undef TSV_GET_VEC_FUNC ////////// /// Handy code to get the value of a column #define TSV_SET_VALUE_BODY() \ TsvFileField* col=clvlcidx2colptr(clvl,cidx); \ if (col==NULL) { \ return (TSV_ERR_NOTFOUND); \ } \ return col->set(val); #define TSV_SET_VALUE_FUNC(TYPE) \ int \ affx::TsvFile::set(int clvl,int cidx,TYPE val) \ { \ TSV_SET_VALUE_BODY(); \ } \ int \ affx::TsvFile::set(int clvl,const std::string& cidx,TYPE val) \ { \ TSV_SET_VALUE_BODY(); \ } // TSV_SET_VALUE_FUNC(const std::string&) TSV_SET_VALUE_FUNC(int) TSV_SET_VALUE_FUNC(unsigned int) TSV_SET_VALUE_FUNC(float) TSV_SET_VALUE_FUNC(double) TSV_SET_VALUE_FUNC(uint64_t) // dont need it any more. #undef TSV_SET_VALUE_BODY #undef TSV_SET_VALUE_FUNC //// Macros for vectors of data. #define TSV_SET_VEC_BODY() \ TsvFileField* col=clvlcidx2colptr(clvl,cidx); \ if (col==NULL) { \ return (TSV_ERR_NOTFOUND); \ } \ return col->set(vec,sep); #define TSV_SET_VEC_FUNC(TYPE) \ int \ affx::TsvFile::set(int clvl,int cidx,const std::vector& vec,char sep) \ { \ TSV_SET_VEC_BODY() \ } \ int \ affx::TsvFile::set(int clvl,const std::string& cidx,const std::vector& vec,char sep) \ { \ TSV_SET_VEC_BODY() \ } // define them. TSV_SET_VEC_FUNC(std::string) TSV_SET_VEC_FUNC(int); TSV_SET_VEC_FUNC(float); TSV_SET_VEC_FUNC(double); #undef TSV_SET_VEC_BODY #undef TSV_SET_VEC_FUNC ////////// /// @brief Is the field set? /// @param clvl The column level level of field /// @param cidx The column index (0-based) of the field /// @return false if set, otherwise true bool affx::TsvFile::isNull(int clvl,int cidx) { TsvFileField* col=clvlcidx2colptr(clvl,cidx); if (col==NULL) { return true; } return col->isNull(); } /// @brief Is the field set? /// @param clvl The column level level of field /// @param cname The column index (0-based) of the field /// @return false if set, otherwise true bool affx::TsvFile::isNull(int clvl,const std::string& cname) { TsvFileField* col=clvlcidx2colptr(clvl,cname); if (col==NULL) { return true; } return col->isNull(); } /// @brief Is the field empty (null or no value)? /// @param clvl The column level level of field /// @param cidx The column index (0-based) of the field /// @return false if set, otherwise true bool affx::TsvFile::isEmpty(int clvl,int cidx) { TsvFileField* col=clvlcidx2colptr(clvl,cidx); if (col==NULL) { return true; } return col->isEmpty(); } /// @brief Is the field set? /// @param clvl The column level level of field /// @param cname The column index (0-based) of the field /// @return false if set, otherwise true bool affx::TsvFile::isEmpty(int clvl,const std::string& cname) { TsvFileField* col=clvlcidx2colptr(clvl,cname); if (col==NULL) { return true; } return col->isEmpty(); } //////////////////// /// @brief Dump the meta-information of the tsvfile /// @remark Used for debugging void affx::TsvFile::dump() { // printf("TsvDump====================\n"); printf("== tsv: filename='%s'\n",m_fileName.c_str()); dump_headers(); dump_bindings(); printf("====================\n"); } /// @brief Dump the header values of the tsvfile /// @remark Used for debugging void affx::TsvFile::dump_headers() { printf("=== headers:\n"); affx::TsvFile::header_t* h_ptr; for (int hi=0;hi<(int)m_headers_vec.size();hi++) { h_ptr=m_headers_vec[hi]; printf("==header: "); if (h_ptr==NULL) { printf("NULL\n"); continue; } if (h_ptr->m_key=="") { printf("%2d:#%s\n",h_ptr->m_order,h_ptr->m_value.c_str()); } else { printf("%2d:#%%%s=%s\n",h_ptr->m_order,h_ptr->m_key.c_str(),h_ptr->m_value.c_str()); } } // for (unsigned int clvl=0;clvlm_bindto_clvl,var->m_bindto_cidx,var->m_bindto_cname.c_str()); } } /// @brief Dump the data of the tsvfile /// @remark Used for debugging void affx::TsvFile::dump_data() { string tmpstr; while (nextLine()==TSV_OK) { int clvl=lineLevel(); printf("%02d :",clvl); for (int cidx=0;cidxdump(); } } } //////////////////// /// @brief Allocate a TsvFileIndex /// @return pointer to the new index TsvFileIndex* affx::TsvFile::index_alloc() { TsvFileIndex* idx=new TsvFileIndex; m_index_vec.push_back(idx); // we will need to index m_index_done=false; return idx; } /// @brief Creates an index object /// @param clvl column level /// @param cname column name /// @param kind TSV_INDEX_STRING or TSV_INDEX_INT /// @param flags unused at the moment /// @return TSV_OK /// @remark The indexing is not done until it is actually needed int affx::TsvFile::defineIndex(int clvl,const std::string& cname,int kind,int flags) { TsvFileIndex* idx=index_alloc(); idx->m_bindto_type=TSV_BINDTO_CNAME; idx->m_bindto_clvl=clvl; idx->m_bindto_cname=cname; // assert(kind!=0); idx->m_kind=kind; idx->m_flags=flags; // return TSV_OK; } /// @brief Creates an index object /// @param clvl column level /// @param cidx column /// @param kind /// @param flags /// @return int affx::TsvFile::defineIndex(int clvl,int cidx,int kind,int flags) { TsvFileIndex* idx=index_alloc(); idx->m_bindto_type=TSV_BINDTO_CIDX; idx->m_bindto_clvl=clvl; idx->m_bindto_cidx=cidx; // assert(kind!=0); idx->m_kind=kind; idx->m_flags=flags; // return TSV_OK; } /// @brief Seek to the start of a line but dont read it. /// @param line line to seek to /// @return tsv_return_t int affx::TsvFile::seekLine(linenum_t line) { // indexBuildMaybe(); // off the end? if (line>m_index_linefpos.size()) { assert(0); return (TSV_ERR_NOTFOUND); } m_fileStream.clear(); m_fileStream.seekg(m_index_linefpos[line]); // we seeked to this line, but did not read it. m_lineNum=line; // if (!m_fileStream.good()) { return (TSV_ERR_FILEIO); } return TSV_OK; } /// @brief goto a line reading the parent lines and then the line itself /// @param line /// @return tsv_return_t /// @remarks When we goto a line, we need to goto the parents first /// to set the parent values. This presents a "read consistent" /// view of the data. int affx::TsvFile::gotoLine(linenum_t line) { vector::iterator lb; linenum_t sline; // do some bookkeeping first clearFields(); indexBuildMaybe(); //printf("gotoLine(%d)===\n",line); if (line>=m_index_linefpos.size()) { int maxLine = (int)m_index_linefpos.size(); if (m_optAbortOnError) { TSV_ERR_ABORT("Cant goto line " + ToStr(line) + " of " + ToStr(maxLine)); } return (TSV_ERR_NOTFOUND); } // Goto each parent lines first // This will bind the parent values so the view of the data is consistent for (unsigned int clvl=0;clvlline)) { sline=*(lb-1); } else { sline=*lb; } // move and parse //printf("%2d : seek(%d)\n",clvl,sline); seekLine(sline); nextLine(); // done? if (sline==line) { break; } } return TSV_OK; } int affx::TsvFile::countTotalDataLines() { int line_cnt=0; // start at the beginning. rewind(); // count them up. while (nextLine()==affx::TSV_OK) { // data line levels are >=0 if (lineLevel()>=0) { line_cnt++; } } // back to the beginning so we leave the file at a known state. rewind(); return line_cnt; } void affx::TsvFile::currentLineAsString(std::string& line) { for (int t=0;tclear(); // pre-compute the cidxes of the indexes which are by name if (idx->m_bindto_type==TSV_BINDTO_CNAME) { idx->m_bindto_cidx=cname2cidx(idx->m_bindto_clvl,idx->m_bindto_cname); } } } // remember where we are before we rewind excursion_fpos=m_fileStream.tellg(); rewind(); unsigned int m_index_vec_size=m_index_vec.size(); int l_num; while (nextLine()==TSV_OK) { l_num=m_lineNum-1; // because nextLine went past it. // line->fpos m_index_linefpos.push_back(m_line_fpos); // if m_lineLvl is <0, then it was a comment or blank line // and there no need to do the rest //printf("L1: %4d : %4d : %u\n",l_num,m_lineLvl,(unsigned int)m_line_fpos); if (m_lineLvl<0) { // skip this line continue; } // parent-child lines m_index_lineclvl[m_lineLvl].push_back(l_num); //std::string key; //std::string val; //get(0,0,key); //get(0,1,val); //printf("L2: K:'%s' V:'%s'\n",key.c_str(),val.c_str()); // for (unsigned int i=0;im_bindto_clvl==m_lineLvl)&& (idx->m_bindto_cidx>=0)) { idx->data_add(&m_column_map[m_lineLvl][idx->m_bindto_cidx],l_num); } } } // m_fileStream.clear(); m_fileStream.seekg(excursion_fpos); // mark the indexes as done m_index_done=true; for (unsigned int i=0;im_done=true; } } return TSV_OK; } /// @brief build the indexes if needed /// @return tsv_return_t int affx::TsvFile::indexBuildMaybe() { if (m_index_done) { return TSV_OK; } return indexBuild(); } /// @brief Find an index which matches what we need /// @param clvl clvl /// @param cidx cidx /// @param kind What is being matched? /// @return NULL if no index found TsvFileIndex* affx::TsvFile::index_matching(int clvl,int cidx,int kind) { TsvFileIndex* idx; for (unsigned int i=0;im_bindto_clvl==clvl)&& (idx->m_bindto_cidx==cidx)&& (idx->m_kind==kind)) { return idx; } } return NULL; } /// @brief Do we have a index on this column for strings? /// @param clvl clvl /// @param cidx cidx /// @param val a dummy value for typedness /// @return NULL or a pointer to the index TsvFileIndex* affx::TsvFile::index_matching_1(int clvl,int cidx,std::string& val) { return index_matching(clvl,cidx,TSV_INDEX_STRING); } /// @brief Do we have a index on this column for ints? /// @param clvl clvl /// @param cidx cidx /// @param val a dummy value for typedness /// @return NULL or a pointer to the index TsvFileIndex* affx::TsvFile::index_matching_1(int clvl,int cidx,int val) { return index_matching(clvl,cidx,TSV_INDEX_INT); } /// @brief Do we have a index on this column for doubles? /// @param clvl clvl /// @param cidx cidx /// @param val a dummy value for typedness /// @return NULL or a pointer to the index TsvFileIndex* affx::TsvFile::index_matching_1(int clvl,int cidx,double val) { return index_matching(clvl,cidx,TSV_INDEX_DOUBLE); } /// @brief Do we have a index on this column for unsigned ints? /// @param clvl clvl /// @param cidx cidx /// @param val a dummy value for typedness /// @return NULL or a pointer to the index TsvFileIndex* affx::TsvFile::index_matching_1(int clvl,int cidx,unsigned int val) { return index_matching(clvl,cidx,TSV_INDEX_UINT); } /// @brief Do we have a index on this column for uint64_t? /// @param clvl clvl /// @param cidx cidx /// @param val a dummy value for typedness /// @return NULL or a pointer to the index TsvFileIndex* affx::TsvFile::index_matching_1(int clvl,int cidx,uint64_t val) { return index_matching(clvl,cidx,TSV_INDEX_ULONGLONG); } /// @brief Clear the results of the last findBegin /// @return tsv_return_t int affx::TsvFile::findResultsClear() { m_findresults_pos=0; m_findresults.clear(); return TSV_OK; } /// @brief Handy template /// @param clvl column level /// @param cidx column index /// @param op the kind of comparison to do (tsv_op_t) /// @param val value to search for /// @param flags how to order the results (tsv_orderby_t) /// @return tsv_error_t template int affx::TsvFile::findBegin_tmpl(int clvl,T1 cidx,int op,T2 val,int flags) { // discard the old findResultsClear(); // gotta be there int cidx_int=cname2cidx(clvl,cidx); if ((clvl<0)||(cidx_int<0)) { return (TSV_ERR_NOTFOUND); } // indexBuildMaybe(); // which index to use? TsvFileIndex* idx=index_matching_1(clvl,cidx_int,val); if (idx==NULL) { /// @todo if (m_optAutoIndex==true){index_column return (TSV_ERR_NOTFOUND); } // //if (idx->done==false) { //indexBuild(); //} // idx->results_append(m_findresults,op,val); // should we sort by line position? if ((flags&TSV_ORDERBY_LINE)!=0) { sort(m_findresults.begin(),m_findresults.end()); } // dont go to the first result -- findNext will do the movement. return TSV_OK; } ///! Crank out the different types we want to expose to the programmer #define TSV_DEFUN_FIND_BEGIN(Xtype1,Xtype2) \ int \ affx::TsvFile::findBegin(int clvl,Xtype1 cidx,int op,Xtype2 val,int flags) \ { \ return findBegin_tmpl(clvl,cidx,op,val,flags); \ } //! Big list of allowed types. TSV_DEFUN_FIND_BEGIN(int ,std::string); TSV_DEFUN_FIND_BEGIN(std::string,std::string); TSV_DEFUN_FIND_BEGIN(int ,int); TSV_DEFUN_FIND_BEGIN(std::string,int); TSV_DEFUN_FIND_BEGIN(int ,double); TSV_DEFUN_FIND_BEGIN(std::string,double); TSV_DEFUN_FIND_BEGIN(int ,unsigned int); TSV_DEFUN_FIND_BEGIN(std::string,unsigned int); TSV_DEFUN_FIND_BEGIN(int ,uint64_t); TSV_DEFUN_FIND_BEGIN(std::string,uint64_t); #undef TSV_DEFUN_FIND_BEGIN ////////// /// @brief Return the count of found lines. /// @return The number of matching lines to the last findBegin. unsigned int affx::TsvFile::findResultsCount() { return (int)m_findresults.size(); } /// @brief Goto the next result /// @return tsv_return_t int affx::TsvFile::findNext() { if (m_findresults_pos>=m_findresults.size()) { return TSV_FIND_LAST; } return gotoLine(m_findresults[m_findresults_pos++]); } ////////// /// @brief Define a TSV file from a string of field names /// @param definition /// @return tsv_return_t int affx::TsvFile::defineFile(const std::string& definition) { // clear out the old state clear(); // and define the new... return defineFileParse(definition); } /// @brief Overlay the definition on top of the current file. /// @param definition /// @return tsv_return_t /// @remarks defineFile uses "\n" to seperate levels, "\t" to seperate columns /// and "," to add options to the field name. /// at the moment there arent any options. int affx::TsvFile::defineFileParse(const std::string& definition) { std::vector split_line; std::vector split_col; std::vector split_coldef; splitstr(definition,'\n',split_line); for (unsigned int li=0;lim_key=="") { m_fileStream << TSV_STRING_COMMENT << h_ptr->m_value << m_optEndl; } // key=value else if (headerKeyLegal(h_ptr->m_key)==TSV_OK) { m_fileStream << TSV_STRING_META << h_ptr->m_key << "=" << h_ptr->m_value << m_optEndl; } } } /// @brief Write the headers for a given level /// @param clvl void affx::TsvFile::writeColumnHeaders_clvl(int clvl) { if ((0<=clvl)&&(clvl1) { return (TSV_ERR_FORMAT); } // set the personality m_optFieldSep='\t'; m_optDoQuote=false; m_optQuoteChar='"'; // tsv_return_t rv; if ((rv=writeOpen(filename))!=TSV_OK) { return rv; } writeKeyValHeaders(); if (getLevelCount()==1) { writeColumnHeaders_clvl(0); } flush(); return TSV_OK; } /// @brief Open the file and write the headers /// @param filename filename to open /// @return tsv_return_t /// @remarks Once open, set the values with "tsv->set(lvl,col,val)" and /// write them with "tsv->writeLevel(lvl)". /// If you want to define your own format, you can cobble it up from: /// writeOpen, setting the options, and the writeHeaders(). tsv_return_t affx::TsvFile::writeTsv_v2(const std::string& filename) { tsv_return_t rv; if ((rv=writeOpen(filename))!=TSV_OK) { return rv; } writeHeaders(); flush(); return TSV_OK; } /// @brief open the file for writing in "v2" format. /// @param filename /// @return tsv_error_t tsv_return_t affx::TsvFile::writeTsv(const std::string& filename) { // use v2 ("#%headerN=") when required if (getLevelCount()>0) { return writeTsv_v2(filename); } // otherwise v1 else { return writeTsv_v1(filename); } } /// @brief Set the options up for "csv" format and write the CSV header /// @param filename /// @return tsv_return_t affx::TsvFile::writeCsv(const std::string& filename) { // this file format only supports one level. if (getLevelCount()!=1) { return (TSV_ERR_FORMAT); } // set the personality m_optFieldSep=','; m_optDoQuote=true; m_optQuoteChar='"'; // tsv_return_t rv; if ((rv=writeOpen(filename))!=TSV_OK) { return rv; } // writeKeyValHeaders(); writeColumnHeaders_clvl(0); // flush(); return TSV_OK; } /// @brief Write a level of data to the file. /// @param clvl The level to write /// @return tsv_return_t int affx::TsvFile::writeLevel(int clvl) { if ((clvl<0)||(clvl>=(int)m_column_map.size())) { // no levels to be written. assert(0); return (TSV_ERR_NOTFOUND); } // indent for the current level writeFieldSep(clvl); size_t cidx_size=(int)m_column_map[clvl].size(); size_t cidx_size_1=cidx_size-1; int last_precision=-1; for (size_t cidx=0;cidxm_val_state==affx::VALSTATE_STRING) { if ((m_optEscapeOk==true)&&(m_optEscapeChar!=0)) { m_fileStream << escapeString(col->m_buffer,m_optEscapeChar); } else { m_fileStream << col->m_buffer; } } else if (col->m_val_state==affx::VALSTATE_DOUBLE) { // change in precision? Avoid doing this each time around. if (last_precision!=col->m_precision) { m_fileStream.setf(ios::fixed, ios::floatfield); m_fileStream.precision(col->m_precision); last_precision=col->m_precision; } m_fileStream << col->m_value_double; } else if (col->m_val_state==affx::VALSTATE_INT) { m_fileStream << col->m_value_int; } else { TSV_ERR_ABORT("writeLevel(): internal error. m_val_state="+ToStr(col->m_val_state)); } // if (m_optDoQuote==true) { m_fileStream<* vec, int optEscapeOk) { affx::TsvFile tsv; int col_idx; std::string tmp_str; vec->clear(); if (tsv.open(fileName)!=TSV_OK) { TSV_ERR_ABORT("extractColToVec: Cant open: '" +fileName+"'"); } if (optEscapeOk==0) { tsv.m_optEscapeOk=false; } col_idx=tsv.cname2cidx(0,colName); if (col_idx<0) { TSV_ERR_ABORT("extractColToVec: column '"+colName+"' not found in file '"+fileName+"'"); } while(tsv.nextLevel(0)==TSV_OK) { if (tsv.get(0,col_idx,tmp_str)!=TSV_OK) { TSV_ERR_ABORT("extractColToVec: Problem reading '"+colName+"' from '"+fileName+"'"); } vec->push_back(tmp_str); } tsv.close(); // return TSV_OK; } int affx::TsvFile::extractColToVec(const std::string& fileName, const std::string& colName, std::vector* vec) { int optEscapeOk=1; #ifdef WIN32 optEscapeOk=0; #endif return extractColToVec(fileName,colName,vec,optEscapeOk); } ////////// int affx::TsvFile::setPrecision(int p) { m_optPrecision=p; return TSV_OK; } ////////// int affx::TsvFile::printDuplicateHeaders() { header_iter_t iter,iter_last; int dup_cnt=0; iter=m_headers_bykey.begin(); // skip non key=val headers while ((iter!=m_headers_bykey.end())&&(iter->second->m_key=="")) { iter++; } if (iter==m_headers_bykey.end()) { return 0; } // walk the keys in order looking for dups while (1) { iter_last=iter; iter++; // done? if (iter==m_headers_bykey.end()) { break; } // a dup? if (iter->second->m_key==iter_last->second->m_key) { // print all the dup headers iter_last->second->print(); dup_cnt++; while (iter->second->m_key==iter_last->second->m_key) { iter->second->print(); dup_cnt++; iter++; } } } // return dup_cnt; } // affx::tsv_type_t TsvFile::stringToColType(const std::string& str) { if (str=="string") { return TSV_TYPE_STRING; } if (str=="int") { return TSV_TYPE_INT; } if (str=="float") { return TSV_TYPE_FLOAT; } if (str=="double") { return TSV_TYPE_DOUBLE; } // it is an error if we couldnt convert the string. return TSV_TYPE_ERR; } /// @brief Count the number of total lines, assuming a text file. /// @param filename name of file to open /// @return Line count. TSV_ERR_FILEIO, TSV_ERR_NOTFOUND on error. int TsvFile::getLineCountInFile(const std::string& filename, bool abortOnError) { if ( filename.empty() || !Fs::fileExists(filename) ) { if (abortOnError) { TSV_ERR_ABORT("affx::TsvFile::getLineCountInFile: file not found: '" + filename + "'."); } else { return 0; } } std::ifstream fstream; Fs::aptOpen(fstream, filename, std::ios_base::binary); // Check an error opening the file. if(!fstream.is_open() || !fstream.good()) { if (abortOnError) { TSV_ERR_ABORT("affx::TsvFile::getLineCountInFile: Could not open file: '" + filename + "' to read."); } else { return 0; } } int count = TSV_ERR_FILEIO; try { count = std::count(std::istreambuf_iterator(fstream), std::istreambuf_iterator(), '\n'); fstream.seekg(-1, std::ios_base::end); char last = '\0'; fstream.get(last); if ( last != '\n' ) { count++; } } catch (exception e) { if ( abortOnError) { TSV_ERR_ABORT(std::string("affx::TsvFile::getLineCountInFile: read error '")+strerror(errno)+"'."); } else { fstream.close(); return 0; } } fstream.close(); return count; } /// @brief Replace a character in a file with another. /// @param filename name of file for replacement /// @param a source character /// @param b replacement character /// @return TSV_OK or (TSV_ERR_FILEIO, TSV_ERR_NOTFOUND) on error. int TsvFile::replaceCharInFile(const std::string& filename, char a, char b, bool abortOnError ) { tsv_return_t rv = TSV_OK; if ( filename.empty() || !Fs::fileExists(filename) ) { if (abortOnError) { TSV_ERR_ABORT("affx::TsvFile::replace: file not found: '" + filename + "'."); } else { return TSV_ERR_NOTFOUND; } } std::string tmp_name = filename + "~"; // Aborts if permission denied if ( Fs::touch(tmp_name, abortOnError) != APT_OK ) { return TSV_ERR_FILEIO; } try { std::ifstream in_file(filename.c_str()); std::ofstream out_file(tmp_name.c_str()); std::istreambuf_iterator in(in_file); std::istreambuf_iterator eos; std::ostreambuf_iterator out(out_file); std::replace_copy(in, eos, out, a, b); } catch (exception e) { if ( abortOnError) { TSV_ERR_ABORT(std::string("affx::TsvFile::replace: error '")+strerror(errno)+"'."); } else { rv = TSV_ERR_FILEIO; } } if ( Fs::rm(filename, false) == APT_OK ) { if ( !Fs::fileRename(tmp_name, filename, false )) { rv = TSV_ERR_FILEIO; } } else{ Fs::rm(tmp_name, false); rv = TSV_ERR_FILEIO; } return rv; } affxparser/src/fusion/file/TsvFile/TsvFile.h0000644000175200017520000010203514516003651022051 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file TsvFile.h * @brief Headers for the TsvFile classes. * Read @link file-format-tsv the TsvFile format @endlink for an overview. */ #ifndef _TSVFILE_H_ #define _TSVFILE_H_ // this comes with the affy SDK #include "portability/affy-base-types.h" #include "util/Err.h" // #include #include #include #include #include #include #include /// The default number of decimal places in output #define TSV_DEFAULT_PRECISION 6 /// Access the rdbuf of the fstream directly. #define TSV_USE_RDBUF 1 ////////// namespace affx { class TsvFile; // The "#ifndef SWIG" is used to avoid exporting the // sections via swig. This keeps the interface small. #ifndef SWIG class TsvFileBinding; class TsvFileField; class TsvFileHeaderLine; class TsvFileIndex; #endif /// line numbers cant be negative typedef uint32_t linenum_t; const int TSV_INTERNSIZE_UNSET=-1; /// Selector for what is used to link the binding enum bindto_t { TSV_BINDTO_NONE = 0, ///< The binding wont be bound TSV_BINDTO_CIDX = 1, ///< Bound to column indexed by number TSV_BINDTO_CNAME = 2, ///< Bound to a column index by name }; enum valstate_t { VALSTATE_NONE=0, VALSTATE_INT=1, VALSTATE_DOUBLE=2, VALSTATE_STRING=3, }; enum tsv_type_t { TSV_TYPE_UNKNOWN = 0, TSV_TYPE_ERR = 1, TSV_TYPE_STRING = 2, TSV_TYPE_INT = 3, TSV_TYPE_FLOAT = 4, TSV_TYPE_DOUBLE = 5, }; /// Return values to signal Error codes enum tsv_return_t { TSV_OK = -1, ///< No Error TSV_LINE_BLANK = -2, ///< The line is blank (m_lineLvl) TSV_LINE_COMMENT = -3, ///< The line is a comment line (m_lineLvl) TSV_ERR_UNKNOWN = -11, ///< An otherwise unknown error TSV_ERR_NOTFOUND = -12, ///< The value was not found TSV_ERR_NULL = -13, ///< The value was null TSV_ERR_CONVERSION = -14, ///< The value could not be converted TSV_ERR_FILEIO = -15, ///< Some sort of file IO error TSV_ERR_FORMAT = -16, ///< The format of the file is bad TSV_ERR_EOF = -17, ///< Hit the end of the file // TSV_HEADER = -30, ///< A header line was read TSV_HEADER_LAST = -31, ///< Did not read a header line TSV_LEVEL_LAST = -32, ///< Did not read a line of the correct level TSV_FIND_LAST = -33, ///< Found the last matching value // TSV_ERR_BINARY = -40, ///< This is some sort of binary file. TSV_ERR_CALVIN = -41, ///< This is a calvin file. TSV_ERR_HDF5 = -42, ///< This is a HDF5 file. // TSV_LASTVALUE = -99, ///< end of errors }; /// Flags supplied to when finding matches enum tsv_op_t { TSV_OP_LT = 0x01, TSV_OP_LTEQ = 0x03, TSV_OP_EQ = 0x02, TSV_OP_GTEQ = 0x06, TSV_OP_GT = 0x04, }; /// The datatype of a index enum tsv_indexkind_t { TSV_INDEX_STRING = 0x1, TSV_INDEX_INT = 0x2, TSV_INDEX_DOUBLE = 0x3, TSV_INDEX_UINT = 0x4, TSV_INDEX_ULONGLONG = 0x5 }; /// Should the results be ordered by the line nubmer or value enum tsv_orderby_t { TSV_ORDERBY_LINE = 0x01, TSV_ORDERBY_VAL = 0x02 }; /// The format of the file. enum tsv_fileformat_t { TSV_FORMAT_NONE = 0x00, TSV_FORMAT_V1 = 0x01, TSV_FORMAT_V2 = 0x02, TSV_FORMAT_TABLE = 0x03, TSV_FORMAT_BAD = 0xff }; /// Requirements for binding. enum tsv_bindopt_t { TSV_BIND_OPTIONAL = 0x01, /// bind if present TSV_BIND_WARNING = 0x02, /// warn if not bound TSV_BIND_REQUIRED = 0x04, /// error if not bound TSV_BIND_NOFLAG = 0x00 /// nothing }; /// Flags for querying headers enum tsv_addflag_t { TSV_ADD_NONE = 0x00, /// just for nothing... TSV_ADD_KEYS = 0x01, /// return keys TSV_ADD_COMMENTS = 0x02, /// return comments TSV_ADD_ALL = (TSV_ADD_KEYS|TSV_ADD_COMMENTS) /// keys and comments }; /// misc optional flags enum tsv_optionflag_t { TSV_OPT_NONE = 0x00, TSV_OPT_CASESENSTIVE = 0x01, TSV_OPT_CASEINSENSTIVE = 0x02, }; // string functions void ltrim(std::string& str); void rtrim(std::string& str); void trim(std::string& str); // std::string tolower(const std::string& str); // void dequote(std::string& str); int splitstr(const std::string& str,char c,std::vector& vec); int countchars(const std::string& str,char c); // int unescapeChar(int c); int escapeChar(int c); std::string escapeString(const std::string& str,const char escapeChar); // bool header_ptr_less(const affx::TsvFileHeaderLine* a,const affx::TsvFileHeaderLine* b); }; ////////// #ifndef SWIG /// TsvFileBinding records the binding of a Column to a field. /// When "tsv->bind()" is called, a TsvFileBinding is created to record /// The details of the binding. When a file is opened, the bindings are /// linked to the correct Columns. class affx::TsvFileBinding { public: int m_bindto_clvl; ///< The column level affx::bindto_t m_bindto_type; ///< Is is bound by index or name? int m_bindto_cidx; ///< The index of the bound column std::string m_bindto_cname; ///< The name of the bound column tsv_bindopt_t m_flags; ///< What options are flagged? int m_interncache_size; ///< How many strings should be interned (reuse the storage) bool islinked; ///< Is this linked to a column? /// Only one of these should be set. /// The value of the column will be stored whereever they point. std::string* m_ptr_string; ///< pointer to std::string int* m_ptr_int; ///< pointer to int double* m_ptr_double; ///< pointer to double float* m_ptr_float; ///< pointer to float unsigned int* m_ptr_uint; ///< pointer to unsigned int uint64_t* m_ptr_ulonglong; ///< pointer to unsigned long long // TsvFileBinding(); ~TsvFileBinding(); // void clear(); std::string get_type_str(); }; ////////// /// TsvFileField holds the data read from a column. /// The data is read into a std::string buffer. /// Rather than resizing the buffer all the time, we try and /// fill it "in place". Most of the time the new value should be the same /// size as the old value. /// We store the clvl and cidx info in the Column as well. /// Saves having to search for it in the TsvFile object. /// /// "cidx" is counted from *after* the leading tabs. /// class affx::TsvFileField { public: int m_clvl; ///< The indent level of the column int m_cidx; ///< The column number of the column. std::string m_cname; ///< Column name. (Could be blank) tsv_type_t m_ctype; bool m_optAutoDequote; ///< Automatically strip quotes? bool m_optAutoTrim; ///< Automatically strip white space? std::string m_buffer; ///< Buffer space // copies of string for reuse int m_interncache_size; std::vector m_interncache_vec; int m_precision; ///< the output precision // bool m_isnull; ///< True if null bool m_ignore; ///< If this column should be ignored int m_val_state; //// Cached "int" conversions. Once converted we dont need to do it again int m_value_int; ///< The cached value bool m_value_int_done; ///< Has this value been converted? int m_value_int_rv; ///< The return value for the conversion //// int m_max_size; /// Cached "double" conversions. double m_value_double; ///< same as int bool m_value_double_done; ///< same as int int m_value_double_rv; ///< same as int /// Cached "unsigned int" conversions. unsigned int m_value_uint; ///< same as int bool m_value_uint_done; ///< same as int int m_value_uint_rv; ///< same as int /// Cached "uint64_t" conversions. uint64_t m_value_ulonglong; ///< same as int bool m_value_ulonglong_done; ///< same as int int m_value_ulonglong_rv; ///< same as int std::vector m_linkedvar_vec; ///< Linked bindings // TsvFileField(); ~TsvFileField(); // void init(); void clear(); bool isNull(); bool isEmpty(); // int setBuffer(const std::string& str); int setPrecision(int places); //int setInternCacheSize(int size); void convertToString(); std::string get_name(); affx::tsv_type_t get_type(); int get_max_size(); affx::tsv_type_t set_type(affx::tsv_type_t ctype); // get the value of the column int get(std::string* val); int get(short* val); int get(int* val); int get(double* val); int get(float* val); int get(unsigned int* val); int get(uint64_t* val); // #ifndef SWIG int get(std::vector* val,char sep=','); int get(std::vector* val,char sep=','); int get(std::vector* val,char sep=','); int get(std::vector* val,char sep=','); #endif // int set(const std::string& val); ///< set the val int set(int val); ///< set the val int set(short val); ///< set the val int set(double val); ///< set the val int set(unsigned int val); ///< set the val int set(uint64_t val); ///< set the val #ifndef SWIG int set(float val); ///< set the val #endif // #ifndef SWIG int set(const std::vector& val,char sep=','); int set(const std::vector& val,char sep=','); int set(const std::vector& val,char sep=','); int set(const std::vector& val,char sep=','); #endif /// void linkedvars_assign(affx::TsvFile* tsvfile); void linkedvar_push(affx::TsvFileBinding* var); void linkedvars_clear(); // std::string get_bound_types_str(); }; ////////// /// @brief TsvFileIndex maps values to line positions class affx::TsvFileIndex { public: /// Maybe we should inherit int m_bindto_clvl; ///< The column level affx::bindto_t m_bindto_type; ///< Is is bound by index or name? int m_bindto_cidx; ///< The index of the bound column std::string m_bindto_cname; ///< The name of the bound column int m_kind; ///< The datatype being indexed (string,int,double) int m_flags; ///< flags for the index (unused) bool m_done; ///< has the index been populated? // @todo Change this to something denser std::multimap m_index_str2line; ///< map of string value to lines std::multimap m_index_int2line; ///< map of int values to lines std::multimap m_index_double2line; ///< map of double values to lines std::multimap m_index_uint2line; ///< map of unsigned int values to lines std::multimap m_index_ulonglong2line; ///< map of uint64_t values to lines // TsvFileIndex(); ~TsvFileIndex(); // void init(); void clear(); void data_add(TsvFileField* field,linenum_t line); // int results_append(std::vector& results,int op,std::string val); int results_append(std::vector& results,int op,int val); int results_append(std::vector& results,int op,double val); int results_append(std::vector& results,int op,unsigned int val); int results_append(std::vector& results,int op,uint64_t val); // void dump(); // Templates are private to prevent accidental use. private: template int results_append_tmpl(std::vector& results,std::multimap& map,int op,T1& val); template void dump_map(std::multimap& map); }; ////////// /// This is a key-value pair with sort order. class affx::TsvFileHeaderLine { public: std::string m_key; std::string m_value; int m_order; ///< order in which this header should be put. /// normally equal to the line number) TsvFileHeaderLine(); TsvFileHeaderLine(const std::string& key,const std::string& value,int order); // bool operator<(const affx::TsvFileHeaderLine& b) const; void print(); }; ////////// #endif /// \brief A class for reading and writing Tab Seperated Value (TSV) files. /// See \link file-format-tsv the TsvFile format document \endlink /// and the example program \link tsv-example.cpp tsv-example.cpp \endlink . class affx::TsvFile { // typedef TsvFileHeaderLine header_t; typedef std::multimap::iterator header_iter_t; public: /// Options which can be set. bool m_optAllowDataComment; ///< allow comments mixed in with data? bool m_optAbortOnError; ///< ? bool m_optAutoColumns; ///< add more columns as needed. bool m_optAutoDequote; ///< remove quotes when reading? bool m_optAutoIndex; ///< ? bool m_optAutoSenseSep; ///< Autosense between tabs and commas bool m_optAutoTrim; ///< remove whitespace from value? bool m_optCheckFormatOnOpen; ///< Automatically check for binary files? bool m_optDoQuote; ///< Put quotes on output? char m_optEscapeChar; ///< the escape char to use bool m_optEscapeOk; ///< Obey the escapechar? bool m_optHasColumnHeader; ///< read the first line as column headers? bool m_optThrowOnError; ///< ? unsigned char m_optQuoteChar; ///< Quote char to use std::string m_optEndl; ///< End of line sequence to use unsigned char m_optFieldSep; ///< Field seperator defaults to TAB. bool m_optHdrDblQuoteComma; ///< Header contains '","'. bool m_optLinkVarsOnOpen; ///< Link variables when calling open() // char m_optQuoteChar1; ///< Quoting Character char m_optQuoteChar2; ///< Quoting Character // int m_optPrecision; ///< Default Precision int m_errno; ///< The error number bool m_eof; ///< at eof? int m_fileFormat; ///< What version of file was read? /// The filename being visited std::string m_fileName; /// Where the data begins (line 0) std::fstream::pos_type m_fileDataPos; /// Tag name of column header std::string m_headName; private: bool m_rawOpen; // marked private to force use of the accessors. int m_lineNum; ///< The current line number int m_lineLvl; ///< The level of the current line // A handle for IO operations std::fstream m_fileStream; //public: std::fstream::pos_type m_line_fpos; ///< Where the current line starts //private: /// std::vector m_headers_vec; bool m_headers_vec_packed; // where we are in the vector int m_headers_idx; header_t* m_headers_curptr; /// the key=val headers in string form. std::multimap m_headers_bykey; /// used for headersNext header_iter_t m_headers_iter; std::string m_headers_curkey_string; /// The columns by column level and column index std::vector > m_column_map; /// Map the columns names to column indexes std::vector > m_cnametocidx_map; /// The state of the bound and linked vars... bool m_linkvars_done; ///< Are the links up to date int m_linkvars_errors; ///< Count of link errors int m_linkvars_warnings; ///< Count of link warnings std::vector m_linkvars_vec; ///< Vector of bindings linked to us /// Have the indexing been done? bool m_index_done; /// A map of line numbers to file postions std::vector m_index_linefpos; /// A map of line numbers by level so we can find the parent lines std::vector > m_index_lineclvl; /// A vector of indexes which are defined. std::vector m_index_vec; /// A vector of lines which match the last "findBegin" std::vector m_findresults; /// The postition we are at in the returned results std::vector::size_type m_findresults_pos; public: // TsvFile(); TsvFile(const std::string& fname); ~TsvFile(); // Dont use the implicit copy or assigment... TsvFile(const TsvFile& that); #ifndef SWIG TsvFile& operator=(const TsvFile& that) { #ifdef _MSC_VER (that); /* unused var */ #endif Err::errAbort("TsvFile: Assigment of TsvFile not allowed."); return *this; }; #endif // int setError(int err); int getError(); int clearError(); // void setAbortOnError(bool value); // these are linking errors but called binding errors... int bindErrorCnt(); void flush(); int setFilename(const std::string& filename); /// \brief Get the filename of the TsvFile std::string getFileName() { return m_fileName; } int getLevelCount(); int getColumnCount(int clvl); static int getLineCountInFile(const std::string& filename, bool abortOnError = false); static int replaceCharInFile(const std::string &filename, char a, char b, bool abortOnError = true); /// \brief Opens a file -- attempts to guess some defaults int open(const std::string& filename); /// Open a Csv file int openCsv(const std::string& filename); /// Opens a file as a table int openTable(const std::string& filename); /// Close the file int close(); private: /// init a new object void init(); public: /// Closes the file and clears bindings and other info void clear(); void clearBindings(); void clearIndexes(); void clearFields(); void clearFieldsBelowClvl(int clvl_start); void clearColumnHeaders(); void clearHeaders(); int headerCount(); /// Resets the options to the defaults void default_options(); /// Check if file is opened int isFileOpen() { return (m_fileStream.is_open()? TSV_OK : TSV_ERR_FILEIO); } // same as the filestream bool is_open() { return m_fileStream.is_open(); } bool good() { return m_fileStream.good(); } // These are internal methods. int f_getline(std::string& line); bool f_lookingat_eol(); int f_read_header_v1(); int f_read_header_v2(); int f_read_headers(); int f_advance_tabs(); int f_advance_eol(); int f_read_column(affx::TsvFileField* col); int f_read_columns(int clvl); // void headers_to_fields_v2(); /// User methods int defineColumn(int clvl,int cidx,const std::string& cname); int defineColumn(int clvl,int cidx,const std::string& cname,tsv_type_t ctype); /// @todo // int defineColumn(int clvl,int cidx,const std::vector& cname,tsv_type_t ctype); // int defineFile(const std::string& definition); int defineFileParse(const std::string& definition); // tsv_return_t writeOpen(const std::string& filename); // tsv_return_t writeTsv(const std::string& filename); tsv_return_t writeTsv_v1(const std::string& filename); tsv_return_t writeTsv_v2(const std::string& filename); // tsv_return_t writeCsv(const std::string& filename); // int write_str(const std::string& str); // void writeHeaders(); void writeKeyValHeaders(); void writeColumnHeaders(); void writeColumnHeaders_clvl(int clvl); void writeFieldSep(int cnt); int writeLevel(int clvl); /// @todo should check for duplicate indexes and not make them int defineIndex(int clvl,const std::string& cname,int kind,int flags); int defineIndex(int clvl,int cidx ,int kind,int flags); // int getHeader(const std::string& key,std::string& val); int getHeader(const std::string& key,int& val); int getHeader(const std::string& key,double& val); int getHeader(const std::string& key,std::vector& val); int getHeaderAppend(const std::string& key,std::vector& val); // obtain the vector of header values whose header names contain the substring in key int getHeaderMatchingKeySubstr(const std::string& key,std::vector& val); int getHeaderMatchingKeySubstrAppend(const std::string& key,std::vector& val); // check to see if there is a header which is equal to val. // this is handy for checking chip types. int hasHeaderEqualTo(const std::string& key,const std::string& val); /// @todo maybe add a flag to "addHeader" to skip checking? int addHeader(const std::string& key,const std::string& val,int order); int addHeader(const std::string& key,const std::string& val); int addHeader(const std::string& key,int val); int addHeader(const std::string& key,double val); int addHeader(const std::string& key,const std::vector& val); // int addHeadersFrom(affx::TsvFile& f_tsv,int flags); int addHeadersFrom(affx::TsvFile& f_tsv,const std::string& prefix,int flags); int addHeadersFrom(affx::TsvFile& f_tsv,const std::string& prefix,std::vector& key_vec); int addHeader_nocheck(const std::string& key,const std::string& val,int order); int addHeader_nocheck(const std::string& key,const std::string& val); int headerKeyLegal(const std::string& key); int headersCount(); void headersBegin(); int headersNext(std::string& key,std::string& val); int headersFindNext(const std::string& key,std::string& val); // int deleteHeaders(const std::string& key); int deleteHeaders(const std::vector& keys); int deleteHeaders(const char** keys); // int printDuplicateHeaders(); #ifndef SWIG TsvFileHeaderLine* nextHeaderPtr(); int deleteHeaderPtr(TsvFileHeaderLine* hdrptr); #endif int addHeaderComment(const std::string& comment); int addHeaderComment(const std::string& comment,int order); int addHeaderComments(const std::vector& comments); // void repackHeaders(); void resortHeaders(); // int writeFileComment(const std::string& comment); // int cname2cidx(int clvl,int cidx); int cname2cidx(int clvl, const std::string& cname, tsv_optionflag_t options=TSV_OPT_NONE); // find the first column name which matches. int cname2cidx(int clvl, const std::string& alias1, const std::string& alias2, tsv_optionflag_t options=TSV_OPT_NONE); int cname2cidx(int clvl, const std::string& alias1, const std::string& alias2, const std::string& alias3, tsv_optionflag_t options=TSV_OPT_NONE); int cname2cidx(int clvl, const std::string& alias1, const std::string& alias2, const std::string& alias3, const std::string& alias4, tsv_optionflag_t options=TSV_OPT_NONE); #ifndef SWIG TsvFileField* clvlcidx2colptr(int clvl,int cidx); TsvFileField* clvlcidx2colptr(int clvl,const std::string& cname); #endif /// @todo rename this nicer... int cidx2cname(int clvl,int cidx,std::string& cname); std::string getColumnName(int clvl,int cidx); // set the default precision int setPrecision(int places); // set the precision on a column int setPrecision(int clvl,const std::string& cname,int places); int setPrecision(int clvl,int cidx,int places); // #ifndef SWIG TsvFileBinding* linkvar_alloc(); void linkvar_column(int clvl,int cidx,affx::TsvFileBinding* var); void linkvar_link(affx::TsvFileBinding* var); #endif int linkvars_makelinks(); int linkvars_maybe(); void linkvars_clear(); void linkvars_free(); // int formatOk(); #ifndef SWIG // void unbindAll(); int bind(int clvl,const std::string& name ,std::string* ptr,tsv_bindopt_t flags=TSV_BIND_OPTIONAL,int interncache_size=TSV_INTERNSIZE_UNSET); int bind(int clvl,int cidx ,std::string* ptr,tsv_bindopt_t flags=TSV_BIND_OPTIONAL,int interncache_size=TSV_INTERNSIZE_UNSET); int bind(int clvl,const std::string& name ,int* ptr,tsv_bindopt_t flags=TSV_BIND_OPTIONAL,int interncache_size=TSV_INTERNSIZE_UNSET); int bind(int clvl,int cidx ,int* ptr,tsv_bindopt_t flags=TSV_BIND_OPTIONAL,int interncache_size=TSV_INTERNSIZE_UNSET); int bind(int clvl,const std::string& name ,double* ptr,tsv_bindopt_t flags=TSV_BIND_OPTIONAL,int interncache_size=TSV_INTERNSIZE_UNSET); int bind(int clvl,int cidx ,double* ptr,tsv_bindopt_t flags=TSV_BIND_OPTIONAL,int interncache_size=TSV_INTERNSIZE_UNSET); int bind(int clvl,const std::string& name ,float* ptr,tsv_bindopt_t flags=TSV_BIND_OPTIONAL,int interncache_size=TSV_INTERNSIZE_UNSET); int bind(int clvl,int cidx ,float* ptr,tsv_bindopt_t flags=TSV_BIND_OPTIONAL,int interncache_size=TSV_INTERNSIZE_UNSET); int bind(int clvl,const std::string& name ,unsigned int* ptr,tsv_bindopt_t flags=TSV_BIND_OPTIONAL,int interncache_size=TSV_INTERNSIZE_UNSET); int bind(int clvl,int cidx ,unsigned int* ptr,tsv_bindopt_t flags=TSV_BIND_OPTIONAL,int interncache_size=TSV_INTERNSIZE_UNSET); int bind(int clvl,const std::string& name ,uint64_t* ptr,tsv_bindopt_t flags=TSV_BIND_OPTIONAL,int interncache_size=TSV_INTERNSIZE_UNSET); int bind(int clvl,int cidx ,uint64_t* ptr,tsv_bindopt_t flags=TSV_BIND_OPTIONAL,int interncache_size=TSV_INTERNSIZE_UNSET); #endif // void printBindingErrors(); // //void setInternCacheSize(int clvl,int cidx,int size); //void setInternCacheSize(int clvl,std::string cname,int size); // info about the current line/row. unsigned int lineNumber(); linenum_t lineNum(); int lineLevel(); bool eof(); // movement commands. int nextLine(); int nextLevel(int clvl); int rewind(); int seekLine(linenum_t line); int gotoLine(linenum_t line); // Counts the total number of data lines in the file. // NOTE: rewinds the file back to the beginning. int countTotalDataLines(); // void currentLineAsString(std::string& line); // bool isNull(int clvl,int cidx); bool isNull(int clvl,const std::string& cname); // bool isEmpty(int clvl,int cidx); bool isEmpty(int clvl,const std::string& cname); // affx::tsv_type_t get_type(int clvl,const std::string& cidx); affx::tsv_type_t get_type(int clvl,int cidx); affx::tsv_type_t set_type(int clvl,const std::string& cidx,affx::tsv_type_t type); affx::tsv_type_t set_type(int clvl,int cidx,affx::tsv_type_t type); // int clear_max_sizes(); int deduce_types(); int deduce_sizes(); // Get a value given a clvl and cidx int get(int clvl,int cidx,std::string& val); int get(int clvl,int cidx,short& val); int get(int clvl,int cidx,int& val); int get(int clvl,int cidx,double& val); int get(int clvl,int cidx,float& val); int get(int clvl,int cidx,unsigned int& val); int get(int clvl,int cidx,uint64_t& val); // int get(int clvl,const std::string& cname,std::string& val); int get(int clvl,const std::string& cname,short& val); int get(int clvl,const std::string& cname,int& val); int get(int clvl,const std::string& cname,double& val); int get(int clvl,const std::string& cname,float& val); int get(int clvl,const std::string& cname,unsigned int& val); int get(int clvl,const std::string& cname,uint64_t& val); #ifndef SWIG // int get(int clvl,int cidx,std::vector* val,char sep=','); int get(int clvl,int cidx,std::vector* val,char sep=','); int get(int clvl,int cidx,std::vector* val,char sep=','); int get(int clvl,int cidx,std::vector* val,char sep=','); // int get(int clvl,const std::string& cname,std::vector* val,char sep=','); int get(int clvl,const std::string& cname,std::vector* val,char sep=','); int get(int clvl,const std::string& cname,std::vector* val,char sep=','); int get(int clvl,const std::string& cname,std::vector* val,char sep=','); #endif // int set(int clvl,int cidx,const std::string& val); //#ifndef SWIG int set(int clvl,int cidx,int val); int set(int clvl,int cidx,float val); int set(int clvl,int cidx,double val); #ifndef SWIG int set(int clvl,int cidx,unsigned int val); #endif int set(int clvl,int cidx,uint64_t val); //#endif // int set(int clvl,const std::string& cname,const std::string& val); int set(int clvl,const std::string& cname,int val); int set(int clvl,const std::string& cname,float val); int set(int clvl,const std::string& cname,double val); #ifndef SWIG int set(int clvl,const std::string& cname,unsigned int val); #endif int set(int clvl,const std::string& cname,uint64_t val); #ifndef SWIG // int set(int clvl,int cidx,const std::vector& val,char sep=','); int set(int clvl,int cidx,const std::vector& val,char sep=','); int set(int clvl,int cidx,const std::vector& val,char sep=','); int set(int clvl,int cidx,const std::vector& val,char sep=','); // int set(int clvl,const std::string& cname,const std::vector& val,char sep=','); int set(int clvl,const std::string& cname,const std::vector& val,char sep=','); int set(int clvl,const std::string& cname,const std::vector& val,char sep=','); int set(int clvl,const std::string& cname,const std::vector& val,char sep=','); #endif // #ifndef SWIG TsvFileIndex* index_alloc(); #endif int indexBuild(); int indexBuildMaybe(); #ifndef SWIG // TsvFileIndex* index_matching(int clvl,int cidx,int kind); // for use by the template TsvFileIndex* index_matching_1(int clvl,int cidx,std::string& val); TsvFileIndex* index_matching_1(int clvl,int cidx,int val); TsvFileIndex* index_matching_1(int clvl,int cidx,double val); TsvFileIndex* index_matching_1(int clvl,int cidx,unsigned int val); TsvFileIndex* index_matching_1(int clvl,int cidx,uint64_t val); #endif std::fstream::pos_type line_fpos(); /// The error messages which one sees are nasty when using templates. /// This is the base template, make it private private: template int findBegin_tmpl(int clvl,T1 cidx,int op,T2 val,int flags); public: /// These are instances of the above template /// The user will get a better error message int findBegin(int clvl,std::string cname,int op,std::string val,int flags=TSV_ORDERBY_LINE); /// \brief like findBegin with different types int findBegin(int clvl,int cidx ,int op,std::string val,int flags=TSV_ORDERBY_LINE); /// \brief like findBegin with different types int findBegin(int clvl,std::string cname,int op,int val,int flags=TSV_ORDERBY_LINE); /// \brief like findBegin with different types int findBegin(int clvl,int cidx ,int op,int val,int flags=TSV_ORDERBY_LINE); /// \brief like findBegin with different types int findBegin(int clvl,std::string cname,int op,double val,int flags=TSV_ORDERBY_LINE); /// \brief like findBegin with different types int findBegin(int clvl,int cidx ,int op,double val,int flags=TSV_ORDERBY_LINE); /// \brief like findBegin with different types #ifndef SWIG int findBegin(int clvl,std::string cname,int op,unsigned int val,int flags=TSV_ORDERBY_LINE); /// \brief like findBegin with different types int findBegin(int clvl,int cidx ,int op,unsigned int val,int flags=TSV_ORDERBY_LINE); /// \brief like findBegin with different types int findBegin(int clvl,std::string cname,int op,uint64_t val,int flags=TSV_ORDERBY_LINE); /// \brief like findBegin with different types int findBegin(int clvl,int cidx ,int op,uint64_t val,int flags=TSV_ORDERBY_LINE); #endif // int findNext(); // unsigned int findResultsCount(); int findResultsClear(); #ifdef TSV_USE_RDBUF // This appears to be faster than calling ".get()" std::filebuf *m_rdbuf; #endif /// used for debugging void dump(); void dump_headers(); void dump_data(); void dump_bindings(); void dump_indexes(); /// void copyFormat(affx::TsvFile& f_tsv); int copyLevel(affx::TsvFile& f_tsv,int clvl); /// Return a pointer to ourself for swig. affx::TsvFile* tsv_ptr() { return this; }; /// put the column of a file into a vector. /// the column and file are required to exist or we die. static int extractColToVec(const std::string& fileName, const std::string& colName, std::vector* vec); static int extractColToVec(const std::string& fileName, const std::string& colName, std::vector* vec, int optEscapeOk); // static affx::tsv_type_t stringToColType(const std::string& str); }; ////////// #endif // _TSVFILE_H_ affxparser/src/fusion/file/TsvFile/TsvFileDiff.cpp0000644000175200017520000002441514516003651023202 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // // affy/sdk/file/TsvFile/TsvFileDiff.cpp --- // // $Id: TsvFileDiff.cpp,v 1.3 2009-09-18 03:37:25 mspald Exp $ // // #include "file/TsvFile/TsvFileDiff.h" // #include "util/Err.h" #include "util/Guid.h" // #include // ////////// affx::TsvFileDiff::TsvFileDiff() { // //m_opt_print_linenums=true; m_opt_print_linenums=false; m_opt_print_same=true; // m_opt_do_headers=true; m_opt_do_data=true; // m_opt_print_max=0; m_opt_max_diff=0.0; m_opt_print_format=TsvFileDiff::FMT_2; // m_residuals_filename=""; m_residuals_tsv=NULL; // clear(); } affx::TsvFileDiff::~TsvFileDiff() { clear(); } void affx::TsvFileDiff::clear() { m_diff_cnt=0; m_print_cnt=0; close_residuals(); } ///// void affx::TsvFileDiff::open_residuals(const std::string& filename, affx::TsvFile& tsv1, affx::TsvFile& tsv2) { // no file if (filename=="") { return; } // m_residuals_tsv=new affx::TsvFile(); // for (int clvl=0;clvldefineColumn(clvl,cidx,cname); } } // m_residuals_tsv->addHeader("apt-file-type","tsv-diff-residuals"); m_residuals_tsv->addHeader("tsv-diff-input-file1",tsv1.getFileName()); m_residuals_tsv->addHeader("tsv-diff-input-file2",tsv2.getFileName()); m_residuals_tsv->addHeader("file-guid",affxutil::Guid::GenerateNewGuid()); // if (m_residuals_tsv->getLevelCount()==1) { m_residuals_tsv->writeTsv_v1(filename); } else { m_residuals_tsv->writeTsv_v2(filename); } } void affx::TsvFileDiff::close_residuals() { if (m_residuals_tsv!=NULL) { m_residuals_tsv->close(); delete m_residuals_tsv; m_residuals_tsv=NULL; } } ///// int affx::TsvFileDiff::readOptionsFrom(const std::string& filename) { affx::TsvFile tsv; if (tsv.open(filename)!=affx::TSV_OK) { Err::errAbort("Cant read TsvFileDiff options from '"+filename+"'"); } readOptionsFrom(tsv); tsv.close(); // return 0; } int affx::TsvFileDiff::readOptionsFrom(affx::TsvFile& tsv) { // put option processing here return 0; } ////////// #define P_LINENUMS_WIDTH "3" void affx::TsvFileDiff::p_linenums(affx::TsvFile& tsv1,affx::TsvFile& tsv2) { p_linenums(tsv1.lineNumber(),tsv2.lineNumber()); } void affx::TsvFileDiff::p_linenums(int l1,int l2) { // if (m_opt_print_linenums) { if (l1>=0) { printf("%" P_LINENUMS_WIDTH "d",l1); } else { printf("%" P_LINENUMS_WIDTH "s",""); } printf(":"); if (l2>=0) { printf("%" P_LINENUMS_WIDTH "d",l2); } else { printf("%" P_LINENUMS_WIDTH "s",""); } printf(" "); } } void affx::TsvFileDiff::p_line(affx::TsvFile& tsv) { int clvl=tsv.lineLevel(); int cidx_max=tsv.getColumnCount(clvl); std::string val; // for (int i=0;i file2 int affx::TsvFileDiff::diffHeaders(affx::TsvFile& tsv1, affx::TsvFile& tsv2) { int start_diff_cnt=m_diff_cnt; std::string key1,val1,key2,val2; // do the diff in three passes. // 1 = changed values tsv1.headersBegin(); while (tsv1.headersNext(key1,val1)==affx::TSV_OK) { if (tsv2.getHeader(key1,val2)==affx::TSV_OK) { if (val1==val2) { if (m_opt_print_same) { if (p_inc()) { printf(" #%%%s=%s\n",key1.c_str(),val1.c_str()); } } } else { // != m_diff_cnt++; if (p_inc()) { printf("-#%%%s=%s\n",key1.c_str(),val1.c_str()); printf("+#%%%s=%s\n",key1.c_str(),val2.c_str()); } } } } // 2 = headers in tsv1 not in tsv2 tsv1.headersBegin(); while (tsv1.headersNext(key1,val1)==affx::TSV_OK) { if (tsv2.getHeader(key1,val2)!=affx::TSV_OK) { m_diff_cnt++; if (p_inc()) { printf("-#%%%s=%s\n",key1.c_str(),val1.c_str()); } } } // 3 = headers in tsv2 not in tsv1 tsv2.headersBegin(); while (tsv2.headersNext(key2,val2)==affx::TSV_OK) { if (tsv1.getHeader(key2,val1)!=affx::TSV_OK) { m_diff_cnt++; if (p_inc()) { printf("+#%%%s=%s\n",key2.c_str(),val2.c_str()); } } } // return m_diff_cnt-start_diff_cnt; } int affx::TsvFileDiff::diffData(affx::TsvFile& tsv1, affx::TsvFile& tsv2) { int start_diff_cnt=m_diff_cnt; std::string val1,val2; // open_residuals(m_residuals_filename,tsv1,tsv2); // tsv1.rewind(); tsv2.rewind(); // tsv1.nextLine(); tsv2.nextLine(); // while ((!tsv1.eof())||(!tsv2.eof())) { // int tsv1_clvl=tsv1.lineLevel(); int tsv2_clvl=tsv2.lineLevel(); // while (((tsv1_clvl>tsv2_clvl)||tsv2.eof())&&(!tsv1.eof())) { if (p_inc()) { p_linenums(tsv1,tsv2); printf("-"); p_line(tsv1); } m_diff_cnt+=tsv1.getColumnCount(tsv1_clvl); tsv1.nextLine(); tsv1_clvl=tsv1.lineLevel(); } // while (((tsv2_clvl>tsv1_clvl)||tsv1.eof())&&(!tsv2.eof())) { if (p_inc()) { p_linenums(tsv1,tsv2); printf("+"); p_line(tsv2); } m_diff_cnt+=tsv2.getColumnCount(tsv2_clvl); tsv2.nextLine(); tsv2_clvl=tsv2.lineLevel(); } // if ((tsv1_clvl>=0)&&(tsv2_clvl>=0)) { int tsv1_cidx_max=tsv1.getColumnCount(tsv1_clvl); int tsv2_cidx_max=tsv2.getColumnCount(tsv2_clvl); // int cidx_max; if (tsv1_cidx_maxset(tsv1_clvl,cidx,"0"); } else { // put the value m_residuals_tsv->set(tsv1_clvl,cidx,val1); } } } // Not string equal -- check for numeric diff else if ((tsv1.get(tsv1_clvl,cidx,d1)==affx::TSV_OK) && (tsv2.get(tsv2_clvl,cidx,d2)==affx::TSV_OK)) { double d_diff=d1-d2; // output the result... if (m_residuals_tsv!=NULL) { m_residuals_tsv->set(tsv1_clvl,cidx,d_diff); } // if small, it isnt a diff if (!((m_opt_max_diff>0.0) && (fabs(d_diff)<=m_opt_max_diff))) { // too big -- mark it down as a diff. m_diff_cnt++; line_diff_cnt++; // } } // normal text diff else { if (m_residuals_tsv!=NULL) { m_residuals_tsv->set(tsv1_clvl,cidx,"'"+val1+"'/'"+val2+"'"); } } } // output the result line if (m_residuals_tsv!=NULL) { m_residuals_tsv->writeLevel(tsv1_clvl); } // print unchanged lines? if ((m_opt_print_same || (line_diff_cnt!=0)) && (p_inc())) { // if (line_diff_cnt==0) { p_linenums(tsv1,tsv2); printf(" "); p_line(tsv1); } else if (m_opt_print_format==TsvFileDiff::FMT_1) { p_diff_1line(tsv1,tsv2); } else if (m_opt_print_format==TsvFileDiff::FMT_2) { p_diff_2line(tsv1,tsv2); } else { Err::errAbort("Bad format"); } } } // tsv1.nextLine(); tsv2.nextLine(); } close_residuals(); return m_diff_cnt-start_diff_cnt; } // int affx::TsvFileDiff::diff(affx::TsvFile& tsv1, affx::TsvFile& tsv2) { int start_diff_cnt=m_diff_cnt; // if (m_opt_do_headers) { diffHeaders(tsv1,tsv2); } if (m_opt_do_data) { diffData(tsv1,tsv2); } // return m_diff_cnt-start_diff_cnt; } affxparser/src/fusion/file/TsvFile/TsvFileDiff.h0000644000175200017520000000441614516003651022646 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2008 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // // affy/sdk/file/TsvFile/TsvFileDiff.h --- // // $Id: TsvFileDiff.h,v 1.3 2009-09-25 17:49:02 mspald Exp $ // #include "file/TsvFile/TsvFile.h" // #include #include // namespace affx { class TsvFileDiff; }; // class affx::TsvFileDiff { public: // bool m_opt_print_same; bool m_opt_print_linenums; int m_opt_print_format; int m_opt_print_max; // bool m_opt_do_headers; bool m_opt_do_data; double m_opt_max_diff; // int m_diff_cnt; int m_print_cnt; enum { FMT_1 = 1, FMT_2 = 2, }; // std::string m_residuals_filename; affx::TsvFile* m_residuals_tsv; // TsvFileDiff(); ~TsvFileDiff(); // void clear(); void open_residuals(const std::string& filename, affx::TsvFile& tsv1,affx::TsvFile& tsv2); void close_residuals(); // void p_linenums(affx::TsvFile& tsv1,affx::TsvFile& tsv2); void p_linenums(int lnum1,int lnum2); void p_line(affx::TsvFile& tsv); bool p_inc(); void p_diff_1line(affx::TsvFile& tsv1,affx::TsvFile& tsv2); void p_diff_2line(affx::TsvFile& tsv1,affx::TsvFile& tsv2); // int readOptionsFrom(const std::string& filename); int readOptionsFrom(affx::TsvFile& tsv); // int diff(const std::string& filename1,const std::string& filename2); int diff(affx::TsvFile& tsv1,affx::TsvFile& tsv2); int diffHeaders(affx::TsvFile& tsv1, affx::TsvFile& tsv2); int diffData(affx::TsvFile& tsv1, affx::TsvFile& tsv2); }; affxparser/src/fusion/file/TsvFile/TsvJoin.cpp0000644000175200017520000003162214516003651022427 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /// @file TsvJoin.cpp /// @brief Class for merging multiple tab separated text files. // #include "file/TsvFile/TsvJoin.h" // #include "util/Fs.h" #include "util/Guid.h" #include "util/Util.h" // #include // using namespace std; using namespace affx; void define_tsvjoin_options(PgOptions* opts) { opts->setUsage("apt-tsv-join - Merge multiple tab separated text files into a single file.\n" "Usage:\n" " apt-tsv-join -o data.txt -k column-name file1.txt file2.txt [...]"); opts->defineOption("k", "key", PgOpt::STRING_OPT, "Name of the column to use to join the various files.", ""); opts->defineOption("o", "out-file", PgOpt::STRING_OPT, "Output file to send the merged results to.", ""); opts->defineOption("p", "prepend-filename", PgOpt::BOOL_OPT, "Prepend the filename to the column header.", "false"); opts->defineOption("", "version", PgOpt::BOOL_OPT, "Display version information.", "false"); opts->defineOption("h", "help", PgOpt::BOOL_OPT, "Print help message.", "false"); } /** * @brief Constructor. * * @param argc Number of command line arguments. * @param argv Command line arguments. * @param version Version string. * * Errors: throw exception to display help messages, if too few input files. */ tsvJoin::tsvJoin (int argc, const char* argv[], const std::string& version) : m_Version (version), m_CommentLine ("############################################################\n") { // Prefer throw() to exit(). Err::setThrowStatus (true); m_Opts = new PgOptions(); define_tsvjoin_options(m_Opts); m_Opts->parseArgv(argv); // Optionally display usage message. if (m_Opts->getBool("help") || argc == 1) { m_Opts->usage(); string msg = "version: " + version + "\n"; cout << msg; exit(0); } // Optionally display version. if (m_Opts->getBool("version")) { string msg = "version: " + version + "\n"; cout << msg; exit(0); } // Require key. m_Key = m_Opts->get("key"); if (m_Key.empty()) { string msg = "FATAL: Must provide --key option."; Err::errAbort (msg); } // Require at least two input files. m_FileCount = (unsigned int)m_Opts->getArgCount(); if (m_FileCount < 2) { string msg = "FATAL: Must provide 2 or more files to merge."; Err::errAbort (msg); } for (unsigned int i = 0; i < m_FileCount; ++i) m_FileNames.push_back (string (m_Opts->getArg(i))); // Require writeable output file. m_Outfile = m_Opts->get("out-file"); if (m_Outfile.empty()) { string msg = "FATAL: Must provide -out-file output file."; Err::errAbort (msg); } else Fs::mustOpenToWrite(m_Out, m_Outfile); // Save prepend status. m_Prepend = m_Opts->getBool("prepend-filename"); // The command line is copied to the output file header. assert (argc > 0); m_CommandLine = argv [0]; for (int i = 1; i < argc; ++i) m_CommandLine += " " + string (argv [i]); } /** * @brief Read, process data, write output. * * Errors: abort if unable to read any input file. */ void tsvJoin::run (void) { // Write initial output. beginOutput(); // Open input files. openInputFiles(); // Generate initial output header lines. beginOutputHeader(); // Write file specific header lines. unsigned int idx = 0; writeHeadersFromFile (m_StreamTsv, idx, m_StreamColNames); for (idx = 0; idx < m_FileCount - 1; ++idx) writeHeadersFromFile (*m_IndexTsvs[idx], idx + 1, m_IndexColNames[idx]); m_Out << m_CommentLine; // Write matching lines. writeMatches(); // Close output file. m_Out.close(); } /** * @brief Begin output. */ void tsvJoin::beginOutput (void) { Verbose::out (1, "MODULE: " + m_Version); Verbose::out (1, "CMD: " + m_CommandLine); m_ExecGuid = affxutil::Guid::GenerateNewGuid(); Verbose::out (1, "exec_guid " + m_ExecGuid); } /** * @brief Open input files. * * Errors: abort if a file could not be opened or if * the key column name was not found. */ void tsvJoin::openInputFiles (void) { string& streamFileName = m_FileNames[0]; Verbose::out (1, "Opening stream file " + streamFileName + "."); if (m_StreamTsv.open (streamFileName) != TSV_OK) Err::errAbort ("Problem opening file " + streamFileName); // Allow comment lines within data. m_StreamTsv.m_optAllowDataComment = true; bool foundKey = false; unsigned int colCount = m_StreamTsv.getColumnCount(0); m_StreamDataColCount = colCount - 1; m_StreamColNames.resize (colCount); for (unsigned int i = 0; i < colCount; ++i) { string colName; m_StreamTsv.cidx2cname (0, i, colName); m_StreamColNames[i] = colName; if (colName == m_Key) { m_StreamTsv.bind (0, i, &m_StreamKeyValue); foundKey = true; } } // Abort if key not found. if (! foundKey) Err::errAbort ("Key " + m_Key + " not found in file " + streamFileName); // Bind to data vector after checking column names - binds must follow // resize(), which may trigger reallocation, which would invalidate the // addresses passed in the bind. m_StreamData.resize (colCount - 1); unsigned int k = 0; for (unsigned int i = 0; i < colCount; ++i) { string colName; m_StreamTsv.cidx2cname (0, i, colName); if (colName != m_Key) m_StreamTsv.bind (0, i, &m_StreamData[k++]); } // Required in the constructor that at least two input file names were provided. assert (m_FileCount > 1); for (unsigned int i = 0; i < m_FileCount - 1; ++i) { string& indexFileName = m_FileNames[i + 1]; Verbose::out (1, "Opening file " + indexFileName + " for indexing."); TsvFile* indexTsv = new TsvFile; if (indexTsv->open (indexFileName) != TSV_OK) Err::errAbort ("Problem opening file " + indexFileName); indexTsv->m_optAllowDataComment = true; m_IndexTsvs.push_back (indexTsv); colCount = indexTsv->getColumnCount(0); m_IndexColNames.push_back (vector (colCount)); m_IndexData.push_back (vector (colCount - 1)); foundKey = false; k = 0; for (unsigned int j = 0; j < colCount; ++j) { string colName; indexTsv->cidx2cname (0, j, colName); m_IndexColNames[i][j] = colName; if (colName == m_Key) foundKey = true; } if (! foundKey) Err::errAbort ("Key " + m_Key + " not found in file " + indexFileName); // Set up index. indexTsv->defineIndex (0, m_Key, TSV_INDEX_STRING, 0); // Remember the number of data columns. m_IndexDataColCount.push_back (colCount - 1); } // end for (unsigned int i = 0; i < m_FileCount - 1; ++i) // Bind to the data vectors after setting up them up: push_back() // and resize() may cause reallocation, which will change the addresses. m_IndexKeyValues.resize (m_FileCount - 1); for (unsigned int i = 0; i < m_FileCount - 1; ++i) { TsvFile* indexTsv = m_IndexTsvs[i]; k = 0; // Add one since had stored number of data columns. unsigned int colCount = m_IndexDataColCount[i] + 1; for (unsigned int j = 0; j < colCount; ++j) { string colName; indexTsv->cidx2cname (0, j, colName); if (colName == m_Key) indexTsv->bind (0, j, &m_IndexKeyValues[i]); else indexTsv->bind (0, j, &m_IndexData[i][k++]); } } } /** * @brief Begin output file header. */ void tsvJoin::beginOutputHeader (void) { const string guid = affxutil::Guid::GenerateNewGuid(); m_Out << "#%guid=" << guid << "\n"; m_Out << "#%exec_guid=" << m_ExecGuid << "\n"; m_Out << "#%exec_version=" << m_Version << "\n"; m_Out << "#%create_date=" << Util::getTimeStamp() << "\n"; m_Out << "#%cmd=" << m_CommandLine << "\n"; } /** * @brief Write header lines copied from input file. */ void tsvJoin::writeHeadersFromFile (TsvFile& tsv, const unsigned int idx, std::vector& columnNames) { m_Out << m_CommentLine; headerLine (idx); m_Out << "name=" << m_FileNames[idx] << "\n"; headerLine (idx); m_Out << "cols="; const unsigned int colCount = columnNames.size(); // Need the key column at least. assert (colCount > 0); m_Out << columnNames[0]; for (unsigned int i = 1; i < colCount; ++i) m_Out << "," << columnNames[i]; m_Out << "\n"; // Finished with the key column name - delete it. columnNames.erase (remove (columnNames.begin(), columnNames.end(), m_Key), columnNames.end()); // Copy file header meta tags to output. string key, value; tsv.headersBegin(); while (tsv.headersNext (key, value) == TSV_OK) // Comment lines among the headers give rise to a pair with a null key. if (! key.empty()) { headerLine (idx); m_Out << key << "=" << value << "\n"; } } /** * @brief Write output column names. */ void tsvJoin::writeColumnNames (void) { // Key first. m_Out << m_Key; // Remaining columns of the stream file. const unsigned int streamColCount = m_StreamColNames.size(); for (unsigned int i = 0; i < streamColCount; ++i) // Write key only once. if (m_StreamColNames[i] != m_Key) { m_Out << "\t"; // Prepend file name if requested. if (m_Prepend) m_Out << m_FileNames[0] << ": "; m_Out << m_StreamColNames[i]; } // Remaining columns of index files. // The key column name has already been deleted, in writeHeadersFromFile(). for (unsigned int idx = 0; idx < m_FileCount - 1; ++idx) { const unsigned int indexColCount = m_IndexColNames[idx].size(); for (unsigned int i = 0; i < indexColCount; ++i) { m_Out << "\t"; if (m_Prepend) m_Out << m_FileNames[idx + 1] << ": "; m_Out << m_IndexColNames[idx][i]; } } m_Out << "\n"; } /** * @brief Write matching lines. */ void tsvJoin::writeMatches (void) { Verbose::out (1, "Done indexing files. Generating merged file."); // Write column names only if find at least one match. bool columnNamesWritten = false; // Read stream file. while (m_StreamTsv.nextLevel (0) == TSV_OK) { bool foundMatch = false; // Look for matches in each index file. for (unsigned int idx = 0; idx < m_FileCount - 1; ++idx) { TsvFile* indexTsv = m_IndexTsvs[idx]; if (indexTsv->findBegin (0, m_Key, TSV_OP_EQ, m_StreamKeyValue) != TSV_OK) Err::errAbort ("Problem reading index file " + m_FileNames[idx + 1]); const int resultCount = indexTsv->findResultsCount(); // Require a match in all index files. if (resultCount == 0) { foundMatch = false; break; } // Fatal error if more than one match was found. else if (resultCount > 1) { string msg = "FATAL: key '" + m_Key + "' is not a unique index. Duplicate key found, ["; msg += m_StreamKeyValue + "] for file " + m_FileNames[idx + 1] + "."; Err::errAbort (msg); } // Found one match - read data if (indexTsv->findNext() != TSV_OK) Err::errAbort ("Problem reading index file " + m_FileNames[idx + 1]); foundMatch = true; } // end for (unsigned int idx = 0; idx < m_FileCount - 1; ++idx) if (foundMatch) { // Found match - write column names once. if (! columnNamesWritten) { writeColumnNames(); columnNamesWritten = true; } // Write key. m_Out << m_StreamKeyValue; // Write columns from stream file. for (unsigned int i = 0; i < m_StreamDataColCount; ++i) m_Out << "\t" << m_StreamData[i]; // Write columns from index files. for (unsigned int idx = 0; idx < m_FileCount - 1; ++idx) { for (unsigned int i = 0; i < m_IndexDataColCount[idx]; ++i) m_Out << "\t" << m_IndexData[idx][i]; } m_Out << "\n"; } // end if (foundMatch) } // end while (m_StreamTsv.nextLevel (0) == TSV_OK) } /** Destructor. */ tsvJoin::~tsvJoin() { clear(); } /** Clear data. */ void tsvJoin::clear() { delete m_Opts; for (unsigned int i = 0; i < m_IndexTsvs.size(); ++i) { m_IndexTsvs[i]->close(); delete m_IndexTsvs[i]; } } void tsvJoin::headerLine (const unsigned int i) { m_Out << "#%file" << i << "_"; } affxparser/src/fusion/file/TsvFile/TsvJoin.h0000644000175200017520000000736514516003651022103 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /// @file TsvJoin.h /// @brief Headers for TsvJoin.cpp. #ifndef TSV_JOIN_H #define TSV_JOIN_H #include "file/TsvFile/TsvFile.h" // #include "util/Err.h" #include "util/PgOptions.h" #include "util/Util.h" #include "util/Verbose.h" // #include #include #include #include #include // class tsvJoin { public: /** Constructor. * @param argc Number of command line arguments. * @param argv Command line arguments. * @param version Version string. */ tsvJoin (int argc, const char* argv[], const std::string& version); /** Destructor. */ ~tsvJoin(); /** Read, process data, write output. */ void run(); private: /** Clear data. */ void clear(); /** Begin output. */ void beginOutput (void); /** Open input files. */ void openInputFiles (void); /** Begin output file header. */ void beginOutputHeader (void); /** Write header lines copied from input file. * @param tsv Tsv object for file. * @param idx File index. * @param columnNames Column names. */ void writeHeadersFromFile (affx::TsvFile& tsv, const unsigned int idx, std::vector& columnNames); /** Write column names. */ void writeColumnNames (void); /** Write matching lines. */ void writeMatches (void); /** Write first part of file related header line. * @param i File index. */ void headerLine (const unsigned int i); /// private data /// Version string. const std::string& m_Version; /// Command line options. PgOptions* m_Opts; /// Command line as a string. std::string m_CommandLine; /// Key. std::string m_Key; /// Output file name. std::string m_Outfile; /// Should filenames be prepended? bool m_Prepend; /// Number of input files. unsigned int m_FileCount; /// Input file names. std::vector m_FileNames; /// Output file stream. std::ofstream m_Out; /// Stream mode input file tsv object. affx::TsvFile m_StreamTsv; /// Stream mode input file column names. std::vector m_StreamColNames; /// Stream mode input file key value for row. std::string m_StreamKeyValue; /// Stream mode input file data values for row. std::vector m_StreamData; /// Index mode input file tsv objects. std::vector m_IndexTsvs; /// Index mode input file key values. std::vector m_IndexKeyValues; /// Index mode input file column names. std::vector > m_IndexColNames; /// Index mode input file data values. std::vector > m_IndexData; /// Execution guid. std::string m_ExecGuid; /// Output section separator. const std::string m_CommentLine; /// Number of data columns in stream file. unsigned int m_StreamDataColCount; /// Number of data columns in index files. std::vector m_IndexDataColCount; }; #endif /* TSV_JOIN_H */ affxparser/src/fusion/file/TsvFile/apt-dump-pgf.cpp0000644000175200017520000000272714516003651023340 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** /// @file apt-dump-pgf.cpp /// @brief Main for dumping pgf information. */ #include "file/TsvFile/DumpPgf.h" #include "util/AptVersionInfo.h" // using namespace std; int main (int argc,const char* argv[]) { try { const string version = AptVersionInfo::versionToReport(); try { dumpPgf pgfDump (argv, version); pgfDump.run(); } catch (exception& e) { // cerr << e.what() << endl; // errAbort() writes the error message. return 1; } return 0; } catch(...) { Verbose::out(1,"Unexpected Error: uncaught exception."); return 1; } return 1; } affxparser/src/fusion/file/TsvFile/apt-tsv-join.cpp0000644000175200017520000000275314516003651023371 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** /// @file apt-tsv-join.cpp /// @brief Main for merging multiple tab separated text files. */ #include "file/TsvFile/TsvJoin.h" #include "util/AptVersionInfo.h" // using namespace std; int main(int argc, const char* argv[]) { try { const string version = AptVersionInfo::versionToReport(); try { tsvJoin join (argc, argv, version); join.run(); } catch (exception& e) { // Verbose::out (1, e.what()); // errAbort() writes the error message. return 1; } return 0; } catch(...) { Verbose::out(1,"Unexpected Error: uncaught exception."); return 1; } return 1; } affxparser/src/fusion/file/TsvFile/tsv-example.cpp0000644000175200017520000003124514516003651023301 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /* * \file tsv-example.cpp * \brief This program is provided as an example of using TsvFile. * You should be able to find a common uses of TsvFile here * to use as a starting point in your programs. * Read \link file-format-tsv the TsvFile docs \endlink more about TsvFile. */ // #include "file/TsvFile/ClfFile.h" #include "file/TsvFile/PgfFile.h" #include "file/TsvFile/TsvFile.h" // #include #include #include // // // #ifdef _MSC_VER #define snprintf _snprintf #endif // using namespace std; using namespace affx; /// When set, dont generate much output -- used when benchmarking the code int opt_benchmark; /// How much output to generate int opt_verbose; ///// Simple loop example (on a clf file) /// @brief Walk the contents of a clf file and count the entries /// @param file_name void example_walk_clf(std::string file_name) { printf("=== example walk clf ====================\n"); int rv; affx::ClfFile clf; clf.m_tsv.m_optAbortOnError=false; // this should be true in real code. if ((rv=clf.open(file_name))!=TSV_OK) { printf("open of '%s' failed! (rv=%d)\n",file_name.c_str(),rv); return; } int cnt_probe=0; while (clf.next_probe()==TSV_OK) { cnt_probe++; } printf("'%s' has %d probes\n",file_name.c_str(),cnt_probe); } ///// Nested loop example (on a pgf file) /// @brief Walk the contents of a PGF file and count the number of each items. /// @param file_name void example_walk_pgf(std::string file_name) { printf("=== example walk pgf ====================\n"); int rv; affx::PgfFile* pgf; pgf=new affx::PgfFile(); pgf->m_tsv.m_optAbortOnError=false; // this should be true in real code. if ((rv=pgf->open(file_name))!=TSV_OK) { printf("open of '%s' failed! (rv=%d)\n",file_name.c_str(),rv); delete pgf; return; } int cnt_probeset=0; int cnt_atoms=0; int cnt_probes=0; while (pgf->next_probeset()==TSV_OK) { cnt_probeset++; while (pgf->next_atom()==TSV_OK) { cnt_atoms++; while (pgf->next_probe()==TSV_OK) { cnt_probes++; } } } printf("read '%s' with %d probesets, %d atoms, %d probes.\n", file_name.c_str(),cnt_probeset,cnt_atoms,cnt_probes); pgf->close(); delete pgf; } ///// /// @brief Query the tsv file by gc_count and print out the matching probes. /// @param file_name Filename to search void example_index_1(std::string file_name) { printf("=== example index 1 ====================\n"); int rv; affx::TsvFile* tsv; int match_cnt=0; int match_print_max=20; // dont print more than this. std::string match_seq; int match_gc=0; int query_gc; const char* opstr; opstr="??"; // tsv=new affx::TsvFile(); if ((rv=tsv->open(file_name))!=TSV_OK) { printf("open of '%s' failed! (rv=%d)\n",file_name.c_str(),rv); delete tsv; return; } // we need an index tsv->defineIndex(2,"gc_count",TSV_INDEX_INT,0); // start the search tsv->findBegin(2,"gc_count",TSV_OP_LTEQ,query_gc=7,TSV_ORDERBY_VAL); opstr="<="; // tsv->findBegin(2,"gc_count",TSV_OP_GTEQ,query_gc=18,TSV_ORDERBY_VAL); opstr=">="; // tsv->findBegin(2,"gc_count",TSV_OP_EQ,query_gc=11,TSV_ORDERBY_VAL); opstr="=="; // tsv->dump_indexes(); // printf("searching for probes (gc_cnt %s %d) ==========\n",opstr,query_gc); printf("hit | line | seq | gc \n"); printf("----+---------+---------------------------+----\n"); while (tsv->findNext()==TSV_OK) { match_cnt++; tsv->get(2,"probe_sequence",match_seq); tsv->get(2,"gc_count",match_gc); if (opt_verbose>0) { if (match_cntlineNumber(),match_seq.c_str(),match_gc); } else if (match_cnt==match_print_max) { printf("first %d matches printed; skipping remaining matches...\n",match_print_max); } } } printf("=== %6d probes with gc_count %s %4d\n",match_cnt,opstr,query_gc); // tsv->close(); delete tsv; } /// @brief Search a pgf file for a matching sequence /// @param file_name void example_index_2(std::string file_name) { printf("=== example index 2 ====================\n"); affx::PgfFile pgf; std::string query_seq="GTTTCTTATACGCTTACTTCGACAA"; printf("searching for probes matching '%s'...\n",query_seq.c_str()); pgf.open(file_name); pgf.m_tsv.defineIndex(2,"probe_sequence",TSV_INDEX_STRING,0); pgf.m_tsv.findBegin(2,"probe_sequence",TSV_OP_EQ,query_seq); // printf("result count: %d \n",pgf.m_tsv.findResultsCount()); while (pgf.m_tsv.findNext()==TSV_OK) { if (opt_verbose>0) { printf("%5d : %10d : %-30s\n",pgf.m_tsv.lineNumber(),pgf.probe_id,pgf.probe_sequence.c_str()); } } } /// @brief Example of writing a custom TSV file. /// @param file_name file to write to void example_write_1(std::string file_name) { affx::TsvFile tsv; // two keys with different values. // Add all your keys before calling "writeTsv". tsv.addHeader("Example","This an example header. (1)"); tsv.addHeader("Example","This an example header. (2)"); // why? because "headerN" is a reserved key. tsv.addHeader("header0","this should not appear in the output"); // "header_whatever" isnt tsv.addHeader("header_test","this should appear in the output"); // The "%6.2" might be used in the future. // for now the format info is ignored. tsv.defineFile("col00\tcol01\tcol02\n" "col10\tcol11\tcol12\tcol13\n" "col20,string\tcol21,integer\tcol22,float,%6.2f\tcol23\tcol24\n"); tsv.writeTsv(file_name); // keys added after here are still added, but wont appear in the output... // ...the header has been written already. // Stuff data into the levels and columns and write it out. int cnt=0; int r0_reps=3; // count of topmost lines int rN_reps=6; // count of inner most lines for (int r0=0;r0at"); pgf.m_tsv.set(2,"gc_count" ,13); pgf.m_tsv.set(2,"probe_length" ,25); pgf.m_tsv.set(2,"interrogation_position",13); pgf.m_tsv.set(2,"probe_sequence" ,"ACAACGACCGTTCCGGAATCGACAT"); pgf.m_tsv.set(2,"exon_position" ,1703); pgf.m_tsv.writeLevel(2); // pgf.m_tsv.close(); } ///// /// @brief write v1 tsv and csv files /// @param file_name name of file /// @param fmt 1 for tsv, 2 for csv void example_write_3(std::string file_name,int fmt) { affx::TsvFile tsv; tsv.defineFile("col00\tcol01\tcol02\tcol03"); tsv.addHeader("Note","This is example output"); //tsv.dump_headers(); if (fmt==1) { tsv.writeTsv_v1(file_name); } else if (fmt==2) { tsv.writeCsv(file_name); } else { assert(0); } int cnt=0; for (int i=0;i<100;i++) { tsv.set(0,"col00",cnt++); tsv.set(0,"col01",cnt++); tsv.set(0,"col02",cnt++); tsv.set(0,"col03",cnt++); tsv.writeLevel(0); } tsv.close(); } /// @brief Example of writing a custom TSV file. /// @param file_name file to write to /// @param row_cnt number of rows to write /// @param col_cnt number of colss to write void example_write_4(std::string file_name,int row_cnt,int col_cnt) { affx::TsvFile tsv; char string_buf[100]; snprintf(string_buf,sizeof(string_buf), "'%s' %d rows, %d cols", file_name.c_str(),row_cnt,col_cnt); tsv.addHeader("Note",string_buf); for (int c=0;c0) { printf("\t"); } printf("%s",colstr.c_str()); cidx++; } printf("\n"); } tsv.close(); } /// @brief example of dumping columns /// @param file_name file to be read and dumped void example_dump_columns(std::string file_name) { affx::TsvFile tsv; int rv; printf("=== example column dump: %s ====================\n",file_name.c_str()); rv=tsv.openTable(file_name); assert(rv==TSV_OK); int clvl_max=tsv.getLevelCount(); for (int clvl=0;clvl // // using namespace std; using namespace affx; /// @brief Check that the tsv->trim methods work. void check_trim() { std::string trimstr1=" \t1 2 3\t "; std::string trimstr1s=" 1 2 3 "; std::string trimstr2; trimstr2=trimstr1; ltrim(trimstr2); //printf("ltrim: '%s'\n",trimstr2.c_str()); assert(trimstr2=="1 2 3\t "); trimstr2=trimstr1s; rtrim(trimstr2); //printf("rtrim: '%s'\n",trimstr2.c_str()); assert(trimstr2==" 1 2 3"); trimstr2=trimstr1; rtrim(trimstr2); //printf("rtrim: '%s'\n",trimstr2.c_str()); assert(trimstr2==" \t1 2 3"); trimstr2=trimstr1; trim(trimstr2); //printf("trim: '%s'\n",trimstr2.c_str()); assert(trimstr2=="1 2 3"); trimstr2="abc"; trim(trimstr2); //printf("trim: '%s'\n",trimstr2.c_str()); assert(trimstr2=="abc"); trimstr2=""; trim(trimstr2); //printf("trim: '%s'\n",trimstr2.c_str()); assert(trimstr2==""); } void check_tolower() { assert(affx::tolower("123")=="123"); assert(affx::tolower("123")!="456"); // assert(affx::tolower("abc")=="abc"); assert(affx::tolower("abc")!="ABC"); assert(affx::tolower("ABC")=="abc"); assert(affx::tolower("DEF")=="def"); } /// Check the dequoting of a string. #define CHECK_DEQUOTE(Xstr1,Xstr2) \ str1=Xstr1; \ str2=str1; \ dequote(str2); \ if (0) printf("dequote('%s')=='%s'\n",str1.c_str(),str2.c_str()); \ assert(str2==Xstr2); /// @brief Check that dequoting works as expected void check_dequote() { std::string str1; std::string str2; CHECK_DEQUOTE("abc","abc"); CHECK_DEQUOTE("'abc'","abc"); CHECK_DEQUOTE("'abc","'abc"); CHECK_DEQUOTE("abc'","abc'"); CHECK_DEQUOTE("''",""); CHECK_DEQUOTE("\"","\""); CHECK_DEQUOTE("\'","\'"); CHECK_DEQUOTE("\"abc\"","abc"); CHECK_DEQUOTE("\'abc\"","\'abc\""); CHECK_DEQUOTE("\"abc\'","\"abc\'"); } /// @brief Check splitstr works. void check_countchars() { assert(countchars("",'-')==0); assert(countchars("abc",'-')==0); assert(countchars("a-b-c",'-')==2); assert(countchars("---",'-')==3); } /// @brief print the results of splitstr /// @param strvec void check_splitstr_print(std::vector& strvec) { printf("#---\n"); for (unsigned int i=0;i strvec; std::string str; str="a-b-c"; // ("a","b","c") splitstr(str,'-',strvec); //check_splitstr_print(strvec); assert(strvec.size()==3); str=""; // ("") splitstr(str,'-',strvec); //check_splitstr_print(strvec); assert(strvec.size()==1); str="-"; // ("","") splitstr(str,'-',strvec); //check_splitstr_print(strvec); assert(strvec.size()==2); } void check_escapeString() { //printf("check_escapeString()\n"); // negative controls assert(escapeChar('A')==0); assert(escapeChar('Z')==0); assert(escapeChar('a')==0); assert(escapeChar('z')==0); // assert(escapeChar('\n')=='n'); assert(escapeChar('\r')=='r'); assert(escapeChar('#')=='#'); // assert(escapeString("A",'\\')=="A"); assert(escapeString("Z",'\\')=="Z"); assert(escapeString("",'\\')==""); assert(escapeString("abc",'\\')=="abc"); // postive assert(escapeString("#",'\\')=="\\#"); } /// @brief Check that the column name functions work. void check_field_name() { affx::TsvFile* tsv=new affx::TsvFile(); std::string cname; int cidx; cname="abc"; tsv->defineColumn(0,1,cname); cidx=tsv->cname2cidx(0,cname); //printf("0:%s=%d\n",cname.c_str(),cidx); assert(cidx==1); // check case senseitivty cidx=tsv->cname2cidx(0,"ABC"); assert(cidx==affx::TSV_ERR_NOTFOUND); cidx=tsv->cname2cidx(0,"ABC",affx::TSV_OPT_CASEINSENSTIVE); assert(cidx==1); cname="def"; tsv->defineColumn(0,10,cname); cidx=tsv->cname2cidx(0,cname); //printf("0:%s=%d\n",cname.c_str(),cidx); assert(cidx==10); // Make sure the alias finds it cidx=tsv->cname2cidx(0,"bogus1",cname); assert(cidx==10); cidx=tsv->cname2cidx(0,"bogus1","bogus2",cname); assert(cidx==10); cidx=tsv->cname2cidx(0,"bogus1","bogus2","bogus3",cname); assert(cidx==10); // cidx=tsv->cname2cidx(0,cname,"bogus1"); assert(cidx==10); cidx=tsv->cname2cidx(0,cname,"bogus1","bogus2"); assert(cidx==10); cidx=tsv->cname2cidx(0,cname,"bogus1","bogus2","bogus3"); assert(cidx==10); // cname="xyz"; cidx=tsv->cname2cidx(0,cname); assert(cidx==TSV_ERR_NOTFOUND); // Make sure the alias dont find it cidx=tsv->cname2cidx(0,"bogus1",cname); assert(cidx==TSV_ERR_NOTFOUND); cidx=tsv->cname2cidx(0,"bogus1","bogus2",cname); assert(cidx==TSV_ERR_NOTFOUND); cidx=tsv->cname2cidx(0,"bogus1","bogus2","bogus3",cname); assert(cidx==TSV_ERR_NOTFOUND); // delete tsv; } void check_empty() { affx::TsvFileField col; // col.clear(); assert(col.isNull()==true); assert(col.isEmpty()==true); // col.setBuffer(""); assert(col.isNull()==false); assert(col.isEmpty()==true); // col.setBuffer("abc"); assert(col.isNull()==false); assert(col.isEmpty()==false); } /// @brief Check the conversion of a field buffer void check_field_convert() { affx::TsvFileField col; double val_double; int val_int; short val_short; int rv; // a good int conversion col.clear(); col.setBuffer("123"); rv=col.get(&val_int); assert((rv==TSV_OK)&&(val_int==123)); rv=col.get(&val_int); assert((rv==TSV_OK)&&(val_int==123)); // a bad int conversion col.clear(); col.setBuffer("abc"); rv=col.get(&val_int); assert((rv==TSV_ERR_CONVERSION)&&(val_int==-1)); rv=col.get(&val_int); assert((rv==TSV_ERR_CONVERSION)&&(val_int==-1)); // a bad int conversion col.clear(); col.setBuffer("123.4"); rv=col.get(&val_int); assert((rv==TSV_ERR_CONVERSION)&&(val_int==-1)); rv=col.get(&val_int); assert((rv==TSV_ERR_CONVERSION)&&(val_int==-1)); // good short col.clear(); col.setBuffer("456"); rv=col.get(&val_short); assert((rv==TSV_OK)&&(val_short==456)); rv=col.get(&val_short); assert((rv==TSV_OK)&&(val_short==456)); // bad short col.clear(); col.setBuffer("123456"); rv=col.get(&val_short); assert((rv==TSV_ERR_CONVERSION)&&(val_short==-1)); rv=col.get(&val_short); assert((rv==TSV_ERR_CONVERSION)&&(val_short==-1)); // bad short but still a good int. rv=col.get(&val_int); assert((rv==TSV_OK)&&(val_int==123456)); // a good double conversion col.clear(); col.setBuffer("123.5"); rv=col.get(&val_double); assert((rv==TSV_OK)&&(val_double==123.5)); rv=col.get(&val_double); assert((rv==TSV_OK)&&(val_double==123.5)); // exponent col.clear(); col.setBuffer("123.5e6"); rv=col.get(&val_double); assert((rv==TSV_OK)&&(val_double==123.5e6)); rv=col.get(&val_double); assert((rv==TSV_OK)&&(val_double==123.5e6)); // a bad double conversion col.clear(); col.setBuffer("abc"); rv=col.get(&val_double); assert((rv==TSV_ERR_CONVERSION)&&(val_double==-1)); rv=col.get(&val_double); assert((rv==TSV_ERR_CONVERSION)&&(val_double==-1)); // bad partial col.setBuffer("123abc"); rv=col.get(&val_double); assert((rv==TSV_ERR_CONVERSION)&&(val_double==-1)); rv=col.get(&val_double); assert((rv==TSV_ERR_CONVERSION)&&(val_double==-1)); /* for (int p=0;p<5;p++) { col.clear(); col.setPrecision(p); col.set(-1.0); std::string buf; col.get(&buf); printf("%d: col val='%s'\n",p,buf.c_str()); } */ } void check_field_vec_convert() { affx::TsvFileField col; int rv; #define TEST_VEC_CONVERT_123(TYPE) do { \ std::vector out_vec_##TYPE; \ rv=col.get(&out_vec_##TYPE,','); \ assert((rv==TSV_OK)&&(out_vec_##TYPE.size()==3)&& \ (out_vec_##TYPE[0]==123)&&(out_vec_##TYPE[2]==789)); \ } while(0) // a good int conversion col.clear(); col.setBuffer("123,456,789"); // TEST_VEC_CONVERT_123(int); TEST_VEC_CONVERT_123(float); TEST_VEC_CONVERT_123(double); col.setBuffer(" 123 , 456 , 789 "); TEST_VEC_CONVERT_123(int); TEST_VEC_CONVERT_123(float); TEST_VEC_CONVERT_123(double); col.setBuffer(" 123 ,, 789 "); TEST_VEC_CONVERT_123(int); TEST_VEC_CONVERT_123(float); TEST_VEC_CONVERT_123(double); col.setBuffer(" 123 , , 789 "); TEST_VEC_CONVERT_123(int); TEST_VEC_CONVERT_123(float); TEST_VEC_CONVERT_123(double); #undef TEST_VEC_CONVERT_123 // std::vector out_vec_string; col.setBuffer("abc,def,ghi"); rv=col.get(&out_vec_string); assert((rv==TSV_OK)&& (out_vec_string.size()==3)&& (out_vec_string[0]=="abc")&& (out_vec_string[1]=="def")&& (out_vec_string[2]=="ghi")); // col.setBuffer(""); rv=col.get(&out_vec_string); assert((rv==TSV_OK)&& (out_vec_string.size()==0)); col.setBuffer(","); rv=col.get(&out_vec_string); assert((rv==TSV_OK)&& (out_vec_string.size()==2)&& (out_vec_string[0]=="")&& (out_vec_string[1]=="")); // col.setBuffer(""); #define TEST_VEC_CONVERT_BLANK(TYPE) do { \ std::vector out_vec_##TYPE; \ rv=col.get(&out_vec_##TYPE,','); \ assert((rv==TSV_OK)&&(out_vec_##TYPE.size()==0)); \ } while (0) TEST_VEC_CONVERT_BLANK(int); TEST_VEC_CONVERT_BLANK(float); TEST_VEC_CONVERT_BLANK(double); #undef TEST_VEC_CONVERT_BLANK std::vector in_vec_int_0; // col.set("abc"); // in_vec_int_0.clear(); col.set(in_vec_int_0,','); assert(col.m_buffer==""); std::vector in_vec_int_1(1,123); col.set(in_vec_int_1,','); assert(col.m_buffer=="123"); std::vector in_vec_int_2(2,123); col.set(in_vec_int_2,','); assert(col.m_buffer=="123,123"); // std::vector in_vec_str_1; col.set(in_vec_str_1,':'); assert(col.m_buffer==""); // in_vec_str_1.push_back("abc"); col.set(in_vec_str_1,':'); assert(col.m_buffer=="abc"); // in_vec_str_1.push_back("def"); in_vec_str_1.push_back("ghi"); // col.set(in_vec_str_1); assert(col.m_buffer=="abc,def,ghi"); col.set(in_vec_str_1,':'); assert(col.m_buffer=="abc:def:ghi"); // std::vector in_vec_d_1; col.setPrecision(6); col.set(in_vec_d_1,':'); assert(col.m_buffer==""); // in_vec_d_1.push_back(1.0); col.set(in_vec_d_1); //printf("### m_buffer='%s'\n",col.m_buffer.c_str()); assert(col.m_buffer=="1.000000"); // in_vec_d_1.push_back(2.0000091); in_vec_d_1.push_back(3.0000011); // col.set(in_vec_d_1); //printf("### m_buffer='%s'\n",col.m_buffer.c_str()); assert(col.m_buffer=="1.000000,2.000009,3.000001"); } /// @brief Check that the headers work void check_headers_1() { affx::TsvFile tsv; std::string key; std::string val; // should be empty at start tsv.headersBegin(); while (tsv.headersNext(key,val)==TSV_OK) { assert(0); } // add one and get the value. std::string test_str_in="test_val"; std::string test_str_out; tsv.addHeader("test_str_1",test_str_in); tsv.getHeader("test_str_1",test_str_out); assert(test_str_out==test_str_in); // should fail. assert(tsv.getHeader("test_str_none",test_str_out)!=TSV_OK); // test multiple exact matches returned in a vector std::vector test_vec_out; std::string test_str_in_2="test_val_2"; tsv.addHeader("test_str_2",test_str_in_2); tsv.getHeader("test_str_2",test_vec_out); assert(test_vec_out.size()==1 && test_vec_out[0]==test_str_in_2); assert(tsv.getHeader("test_str_none",test_vec_out)!=TSV_OK); assert(test_vec_out.size()==0); tsv.getHeader("test_str_1",test_vec_out); assert(test_vec_out.size()==1 && test_vec_out[0]==test_str_in); tsv.getHeaderAppend("test_str_2",test_vec_out); assert(test_vec_out.size()==2 && test_vec_out[1]==test_str_in_2); assert(tsv.getHeaderAppend("test_str_none",test_vec_out)!=TSV_OK); assert(test_vec_out.size()==2); tsv.getHeaderAppend("test_str_1",test_vec_out); assert(test_vec_out.size()==3 && test_vec_out[2]==test_str_in); // test multiple exact matches returned in a vector tsv.getHeaderMatchingKeySubstr("test_str_2",test_vec_out); assert(test_vec_out.size()==1 && test_vec_out[0]==test_str_in_2); assert(tsv.getHeaderMatchingKeySubstr("test_str_none",test_vec_out)!=TSV_OK); assert(test_vec_out.size()==0); tsv.getHeaderMatchingKeySubstr("test_str_1",test_vec_out); assert(test_vec_out.size()==1 && test_vec_out[0]==test_str_in); tsv.getHeaderMatchingKeySubstrAppend("test_str_2",test_vec_out); assert(test_vec_out.size()==2 && test_vec_out[1]==test_str_in_2); assert(tsv.getHeaderMatchingKeySubstrAppend("test_str_none",test_vec_out)!=TSV_OK); assert(test_vec_out.size()==2); tsv.getHeaderMatchingKeySubstrAppend("test_str_1",test_vec_out); assert(test_vec_out.size()==3 && test_vec_out[2]==test_str_in); // test multiple partial matches returned in a vector tsv.getHeaderMatchingKeySubstr("test",test_vec_out); assert(test_vec_out.size()==2); assert(tsv.getHeaderMatchingKeySubstr("test_str_none",test_vec_out)!=TSV_OK); assert(test_vec_out.size()==0); tsv.getHeaderMatchingKeySubstr("str_1",test_vec_out); assert(test_vec_out.size()==1 && test_vec_out[0]==test_str_in); tsv.getHeaderMatchingKeySubstrAppend("test",test_vec_out); assert(test_vec_out.size()==3); assert(tsv.getHeaderMatchingKeySubstrAppend("test_str_none",test_vec_out)!=TSV_OK); assert(test_vec_out.size()==3); tsv.getHeaderMatchingKeySubstrAppend("str_1",test_vec_out); assert(test_vec_out.size()==4 && test_vec_out[3]==test_str_in); int test_int_in=123; int test_int_out=0; tsv.addHeader("test_int_1",test_int_in); tsv.getHeader("test_int_1",test_int_out); assert(test_int_out==test_int_in); // assert(tsv.getHeader("test_int_none",test_int_out)!=TSV_OK); double test_double_in=123.456; double test_double_out=0; tsv.addHeader("test_double_1",test_double_in); tsv.getHeader("test_double_1",test_double_out); assert(test_double_out==test_double_in); // should fail assert(tsv.getHeader("test_double_none",test_double_out)!=TSV_OK); // They should remain in the order added. for (int i=0;i<10;i++) { tsv.addHeader("a",i); tsv.addHeader("b",i); tsv.addHeader("c",i); tsv.addHeaderComment("comment"); } //tsv.dump_headers(); // assert(tsv.hasHeaderEqualTo("a","1")==TSV_OK); assert(tsv.hasHeaderEqualTo("c","1")==TSV_OK); // assert(tsv.hasHeaderEqualTo("a","99")==TSV_ERR_NOTFOUND); assert(tsv.hasHeaderEqualTo("d","1")==TSV_ERR_NOTFOUND); // tsv.headersBegin(); for (int i=0;i<10;i++) { tsv.headersFindNext("b",val); assert(i==strtol(val.c_str(),NULL,0)); } // no header returned by headersNext should be blank tsv.headersBegin(); while (tsv.headersNext(key,val)==TSV_OK) { //printf("#%s=%s\n",key.c_str(),val.c_str()); assert(key!=""); } } ////////// /// @brief Check table reading void check_read_table_1() { affx::TsvFile tsv; int val_ref=0; int val_col; tsv.openTable("data-table-1.txt"); assert(tsv.countTotalDataLines()==10); while (tsv.nextLevel(0)==TSV_OK) { int cidx=0; //printf("num=%3d: lvl=%3d:",tsv.lineNumber(),tsv.lineLevel()); while ((cidxm_value.c_str()); hdr_cnt++; if ((hdr->m_key=="") && (hdr->m_value==comment)) { hdr_found++; } } // assert((hdr_cnt==1)&&(hdr_found==1)); // tsv.close(); } #define CH2_CNT 10 /// @brief Check the behavior of headers void check_headers_3() { affx::TsvFile tsv; int rv; for (int i=0;i %2d\n",key,file_key,file_val); assert(file_key==key); assert(file_val==(101*key)); } assert(num_found==1); } tsv.close(); } /// @brief Check that we can read a windows path name void check_winpath_1() { int rv; std::string pathname; affx::TsvFile tsv; rv=tsv.open("./data-test-winpath.txt"); assert(rv==TSV_OK); tsv.m_optEscapeOk=false; tsv.nextLevel(0); rv=tsv.get(0,"pathname",pathname); assert(rv==TSV_OK); //printf("pathname='%s'\n",pathname.c_str()); assert(pathname=="C:\\some\\path\\to\\a\\file.txt"); tsv.close(); } ////////// void check_read_vec_1() { int rv; std::vector vec_i; affx::TsvFile tsv; rv=tsv.open("./data-test-4.tsv"); assert(rv==TSV_OK); tsv.nextLevel(0); rv=tsv.get(0,"col_00",&vec_i); assert((rv==TSV_OK)&&(vec_i.size()==3)&&(vec_i[0]==1)&&(vec_i[2]==3)); rv=tsv.get(0,"col_01",&vec_i); assert((rv==TSV_OK)&&(vec_i.size()==3)&&(vec_i[0]==4)&&(vec_i[2]==6)); tsv.nextLevel(0); rv=tsv.get(0,"col_00",&vec_i); assert((rv==TSV_OK)&&(vec_i.size()==3)&&(vec_i[0]==7)&&(vec_i[2]==9)); rv=tsv.get(0,"col_01",&vec_i,':'); assert((rv==TSV_OK)&&(vec_i.size()==3)&&(vec_i[0]==10)&&(vec_i[2]==12)); tsv.nextLevel(0); std::string tmp_str; std::vector tmp_vec_d; rv=tsv.get(0,0,tmp_str); assert((rv==TSV_OK)&&(tmp_str=="7.005,8.123,9.100")); tsv.get(0,0,&tmp_vec_d); assert((rv==TSV_OK)&&(tmp_vec_d.size()==3)&& (tmp_vec_d[0]==7.005)&&(tmp_vec_d[1]==8.123)&&(tmp_vec_d[2]==9.100));; rv=tsv.get(0,"col_01",tmp_str); assert((rv==TSV_OK)&&(tmp_str=="7.005005:8.123456:9.100000")); rv=tsv.get(0,"col_01",&tmp_vec_d,':'); assert((rv==TSV_OK)&&(tmp_vec_d.size()==3)&& (tmp_vec_d[0]==7.005005)&&(tmp_vec_d[1]==8.123456)&&(tmp_vec_d[2]==9.100000));; tsv.close(); } void check_write_10() { affx::TsvFile tsv; tsv.defineColumn(0,0,"col_00"); tsv.writeTsv_v1("./data-test-10.out"); for (int i=0;i<10;i++) { tsv.set(0,0,i); tsv.writeLevel(0); // if you are debugging, you might want to explictly flush the output. // it is not required. Be warned that this slows down the IO. tsv.flush(); } tsv.close(); } void check_write_vec_1() { int rv; affx::TsvFile tsv; tsv.defineColumn(0,0,"col_00"); tsv.defineColumn(0,1,"col_01"); rv=tsv.writeTsv_v1("./data-test-4.out"); // std::vector vec_i; // vec_i.clear(); vec_i.push_back(1); vec_i.push_back(2); vec_i.push_back(3); tsv.set(0,0,vec_i); vec_i.clear(); vec_i.push_back(4); vec_i.push_back(5); vec_i.push_back(6); tsv.set(0,"col_01",vec_i); // tsv.writeLevel(0); // vec_i.clear(); vec_i.push_back(7); vec_i.push_back(8); vec_i.push_back(9); tsv.set(0,0,vec_i); vec_i.clear(); vec_i.push_back(10); vec_i.push_back(11); vec_i.push_back(12); tsv.set(0,"col_01",vec_i,':'); // tsv.writeLevel(0); // std::vector vec_d; vec_d.push_back(7.0050051); vec_d.push_back(8.123456); vec_d.push_back(9.1); // tsv.setPrecision(0,0,3); tsv.set(0,0,vec_d); // tsv.setPrecision(0,"col_01",6); tsv.set(0,"col_01",vec_d,':'); // tsv.writeLevel(0); // tsv.close(); } void check_read_colvec_1() { std::vector colvec; affx::TsvFile::extractColToVec("data-test-3.tsv","val",&colvec,0); // if (0) { for (int i=0;i #include #include #include // #ifdef WIN32 #define SNPRINTF _snprintf #else #define SNPRINTF snprintf #endif ////////// /// @brief Count the datalines in the tsvfile fname. /// @param fileName filename of the tsvFile void tsv_util_linecount(const std::string& fileName) { int rv; int data_lines; affx::TsvFile* tsv; tsv=new affx::TsvFile(); rv=tsv->open(fileName); printf("%-40s: ",fileName.c_str()); if (rv!=affx::TSV_OK) { printf("ERR=%d\n",rv); delete tsv; return; } std::string str; data_lines=0; while (tsv->nextLine()==affx::TSV_OK) { data_lines++; //tsv->get(0,0,str); //printf("%s\n",str.c_str()); } printf("datalines=%d\n",data_lines); tsv->close(); delete tsv; } ////////// /// @brief Dump the just the headers of the TsvFile to stdout. /// @param fileName filename fo the tsvFile void tsv_util_headers(const std::string& fileName) { int rv; affx::TsvFile* tsv; std::string key,val; tsv=new affx::TsvFile(); rv=tsv->open(fileName); if (rv!=affx::TSV_OK) { printf("%s: ERR=%d\n",fileName.c_str(),rv); delete tsv; return; } tsv->headersBegin(); while (tsv->headersNext(key,val)==affx::TSV_OK) { // tack on the "#%" so it looks the same as it does in the file. printf("#%%%s=%s\n",key.c_str(),val.c_str()); } tsv->close(); delete tsv; } ////////// int tsv_util_print_duplicate_headerkeys(const std::string& fileName) { int rv; affx::TsvFile* tsv; tsv=new affx::TsvFile(); rv=tsv->open(fileName); if (rv!=affx::TSV_OK) { printf("%s: ERR=%d\n",fileName.c_str(),rv); delete tsv; return 1; } int cnt=tsv->printDuplicateHeaders(); tsv->close(); delete tsv; return (cnt==0)?0:1; } ////////// /// @brief Copy contents from one tsvfile to another. /// @param f_fileName filename of the "from/source" tsvfile /// @param t_fileName filename of the "to/destination" tsvfile /// @param fmt void tsv_util_copy(const std::string& f_fileName,const std::string& t_fileName,int fmt) { affx::TsvFile f_tsv; affx::TsvFile t_tsv; // f_tsv.open(f_fileName); // t_tsv.copyFormat(f_tsv); // better name? t_tsv.addHeadersFrom(f_tsv,affx::TSV_ADD_ALL); // if (fmt==1) { t_tsv.writeCsv(t_fileName); } else if (fmt==2) { t_tsv.writeTsv(t_fileName); } else { assert(0); } while (f_tsv.nextLine()==affx::TSV_OK) { t_tsv.copyLevel(f_tsv,f_tsv.lineLevel()); t_tsv.writeLevel(f_tsv.lineLevel()); } t_tsv.close(); f_tsv.close(); } ////////// /// The limit of filehandles we can paste at one time. // #define TSV_PASTE_FH_LIMIT 900 // for testing #define TSV_PASTE_FH_LIMIT 900 /// @brief Paste the columns of several tsvFiles into one tsvfile in one pass. /// Cant to more than the limit in one pass. /// @param out_fileName filename of the output tsvfile. /// @param in_tsv_fileNames vector of input tsvfile names. /// @param in_header_fileNames vector of names to put in header. /// (This is needed for the final multi-pass case.) /// @param seg_max_lines If non-zero, split the output file into segments. (0=>no segments.) /// @param key_col_name Key column name to match while pasting. (""=> no key column.) /// @param verbose The verbosity level void tsv_util_paste_basic(const std::string& out_fileName, const std::vector in_tsv_fileNames, const std::vector in_header_fileNames, int seg_max_lines, const std::string& key_col_name, int verbose) { std::vector in_tsv; std::vector in_tsv_key_cidx; affx::TsvFile out_tsv; int o_cidx; int seg_count=0; int out_tsv_lines=0; // if (in_tsv_fileNames.size()>TSV_PASTE_FH_LIMIT) { // @todo: why doesnt '#TSV_PASTE_FH_LIMIT' work here? Err::errAbort("tsv-paste: Can only paste " +ToStr(TSV_PASTE_FH_LIMIT)+ " tsv files at at time. (unix fh limit.)"); } // if (seg_max_lines<0) { Err::errAbort("tsv-paste: seg_max_lines must be 0 (disabled) a postive number."); } // some output for the user if (verbose>=2) { for (int i=0;i %s\n",out_fileName.c_str()); } // the key_col_name might be numeric. char* key_col_name_ptr2; const char* key_col_name_ptr1=key_col_name.c_str(); int key_col_num=strtol(key_col_name_ptr1,&key_col_name_ptr2,10); // not a full conversion. if (*key_col_name_ptr2!=0) { key_col_num=-1; } // scan the inputs to... for (int i=0;iopen(in_tsv_fileNames[i]); in_tsv.push_back(tsvp); // if (verbose>=3) { printf("tsv_paste: open '%s'\n",in_tsv_fileNames[i].c_str()); } // ...check for key name. if (key_col_name=="") { // no key name in_tsv_key_cidx.push_back(-1); } else { int key_cidx; key_cidx=in_tsv[i]->cname2cidx(0,key_col_name); if (key_col_num>=0) { key_cidx=key_col_num; } else { Err::errAbort("tsv-paste: missing column '"+key_col_name+"' in input '"+in_tsv_fileNames[i]+"'"); } if ((key_cidx<0)||(key_cidx>=in_tsv[i]->getColumnCount(0))) { Err::errAbort("tsv-paste: key column is out of bounds. (key_cidx="+ToStr(key_cidx)+")"); } in_tsv_key_cidx.push_back(key_cidx); } } // ...add header info describing the input files used. for (int i=0;igetColumnCount(0);c_idx++) { // only include the first check column, skip the rest. if ((c_idx!=in_tsv_key_cidx[i])||(i==0)) { out_tsv.defineColumn(0,o_cidx++,in_tsv[i]->getColumnName(0,c_idx)); } } } // open the output file in the unsegmented case if (seg_max_lines==0) { out_tsv.writeTsv_v1(out_fileName); } // paste the column while we have rows. while (1) { // open a segmented output file as needed if ((seg_max_lines>0) && // have segments? ((out_tsv_lines>=seg_max_lines) || // hit seg size? ((seg_count==0)&&(out_tsv_lines==0)) // first time through? )) { char buf[300]; sprintf(buf,"%s-%04d",out_fileName.c_str(),seg_count++); out_tsv.writeTsv_v1(buf); out_tsv_lines=0; }; std::string key_col_value; bool more_lines=false; for (int i=0;inextLevel(0)==affx::TSV_OK) { more_lines=true; } } if (!more_lines) { break; } // copy row data to the output tsv. o_cidx=0; for (int i=0;igetColumnCount(0);i_cidx++) { // get the input. std::string tmp; int rv=in_tsv[i]->get(0,i_cidx,tmp); // is this a check column? if (i_cidx==in_tsv_key_cidx[i]) { // must be set if (rv!=affx::TSV_OK) { Err::errAbort("tsv-paste: Check column value is required. '"+in_tsv_fileNames[i]+"'"); } // the first is the reference if (i==0) { out_tsv.set(0,o_cidx++,tmp); key_col_value=tmp; } else { if (tmp!=key_col_value) { Err::errAbort("tsv-paste: Check column mismatch. " "file='"+in_tsv_fileNames[i]+"' " "line="+ToStr(in_tsv[i]->lineNum())+" " "ref='"+key_col_value+"' " "val='"+tmp+"'"); } } } // a normal column else { out_tsv.set(0,o_cidx++,tmp); } } } // out_tsv.writeLevel(0); out_tsv_lines++; } // while // clean up out_tsv.close(); // for (int i=0;iclose(); delete in_tsv[i]; } } /// @brief Paste the columns of the input tsv files into a big tsv file. /// If there are too many inputs, do the work in several passes. /// Also splits the output into seg_max_line size files if requested. /// @param out_fileName /// @param in_tsv_fileNames /// @param seg_max_lines /// @param key_col_name /// @param verbose /// @param tsv_paste_fh_limit void tsv_util_paste(const std::string& out_fileName, const std::vector in_tsv_fileNames, int seg_max_lines, const std::string& key_col_name, int verbose, int tsv_paste_fh_limit) { // the simple case, no passes needed... if (in_tsv_fileNames.size()<=tsv_paste_fh_limit) { // ...so just do it. tsv_util_paste_basic(out_fileName, in_tsv_fileNames,in_tsv_fileNames, seg_max_lines,key_col_name,verbose); // and we are done. return; } // complex case, passes are needed. if (verbose>=1) { printf("tsv_util_paste: multiple passes needed to process '%d' inputs.\n", int(in_tsv_fileNames.size())); } // we have to be able to open at least two files at a time. if (tsv_paste_fh_limit<=1) { Err::errAbort("tsv_paste_fh_limit must be bigger than 2."); } // std::vector copy_fileNames=in_tsv_fileNames; // int pass_cnt=0; std::string pass0_out_name; std::string pass1_out_name; std::vector pass_out_names; std::vector pass1_fileNames; // while we have more than the limit, we need to use a temp file. while (copy_fileNames.size()>tsv_paste_fh_limit) { pass1_fileNames.clear(); // tack on the prior tmp file. if (pass0_out_name!="") { pass1_fileNames.push_back(pass0_out_name); } // add some more input files. // dont adjust the length for the file we pushed on. int len=tsv_paste_fh_limit; pass1_fileNames.insert(pass1_fileNames.end(), copy_fileNames.begin(),copy_fileNames.begin()+len); copy_fileNames.erase(copy_fileNames.begin(),copy_fileNames.begin()+len); // gen the name for the new temp file. char buf[100]; SNPRINTF(buf,sizeof(buf),"-%05d.tmp",pass_cnt); pass1_out_name=out_fileName+buf; pass_out_names.push_back(pass1_out_name); // if (verbose>=1) { printf("pass: %d : '%s'...\n",pass_cnt,pass1_out_name.c_str()); } // paste what we have into the new tmp file. tsv_util_paste_basic(pass1_out_name, pass1_fileNames, pass1_fileNames, // the filenames in this pass 0, // dont segment. key_col_name,verbose); // get rid of the old tmp file. if (pass0_out_name!="") { //printf("rm %s\n",pass0_out_name.c_str()); Fs::rm(pass0_out_name); } // the current is now the old. pass_cnt++; pass0_out_name=pass1_out_name; } // now do the last pass into the official output file. pass1_fileNames.clear(); if (pass0_out_name!="") { pass1_fileNames.push_back(pass0_out_name); } pass1_fileNames.insert(pass1_fileNames.end(),copy_fileNames.begin(),copy_fileNames.end()); tsv_util_paste_basic(out_fileName, // the final output file pass1_fileNames, in_tsv_fileNames, // the filenames for all the inputs. seg_max_lines, // do segment if needed. key_col_name,verbose); // and get rid of it. Fs::rm(pass0_out_name); } ////////// #define BENCHMARK_COL_CNT 10 void tsv_util_benchmark(int benchmark_cnt,const std::string& filename) { if (benchmark_cnt<=0) { printf("Need to write at least one double."); return; } if (filename=="") { printf("Need to give a file to write to."); return; } int rv; affx::TsvFile* tsv; tsv=new affx::TsvFile(); tsv->addHeader("benchmark-cnt",benchmark_cnt); tsv->addHeader("benchmark-file",filename); for (int col=0;coldefineColumn(0,col,buf); } rv=tsv->writeTsv_v1(filename); int cnt=0; while (cntset(0,c,(cnt++)+0.123456); } tsv->writeLevel(0); } // tsv->close(); delete tsv; } ////////// /// @brief entry point for tsv-util /// @param argc /// @param argv /// @return int main(int argc,const char* argv[]) { int rv=0; PgOptions* opts=new PgOptions(); // opts->setUsage("Utility functions for tsv files." "\n" "\n" ); // opts->defineOption("h","help", PgOpt::BOOL_OPT, "This message.", "false"); opts->defineOption("v","verbose",PgOpt::INT_OPT, "verbose level", "0"); opts->defineOption("","headers", PgOpt::BOOL_OPT, "INFILE = Display the headers of the CSV/TSV file.", "false"); opts->defineOption("","linecount", PgOpt::BOOL_OPT, "INFILE = count the lines of data in the file.", "false"); opts->defineOption("","to-csv", PgOpt::BOOL_OPT, "INFILE OUTFILE = Covert the file to CVS format.", "false"); opts->defineOption("","to-tsv", PgOpt::BOOL_OPT, "INFILE OUTFILE = Convert the file to TSV format.", "false"); // opts->defineOption("","paste", PgOpt::STRING_OPT, "OUTFILE FILELIST = Paste the files of filelist into outfile.", ""); opts->defineOption("","seg-lines",PgOpt::INT_OPT, "Segment size", "0"); opts->defineOption("","key-col", PgOpt::STRING_OPT, "COLUMN_NAME = Column name which must be present and equal in all inputs files, if set.", ""); opts->defineOption("","max-paste-fh", PgOpt::INT_OPT, "CNT = Max number of files to paste in one pass.", "900"); // opts->defineOption("","benchmark", PgOpt::INT_OPT, "Number of doubles to write for benchmarking output.", "0"); opts->defineOption("","output", PgOpt::STRING_OPT, "Output file for some operations.", ""); // opts->defineOption("","diff",PgOpt::BOOL_OPT, "Compare headers and data of two tsv formatted files. " "There are a number of options to control printing of the diff.", "false"); // opts->defineOption("","diff-headers",PgOpt::BOOL_OPT, "BOOL = Compare the headers of the files.", "true"); opts->defineOption("","diff-data",PgOpt::BOOL_OPT, "BOOL = Compare the data in the files.", "true"); // opts->defineOption("","diff-print-linenums",PgOpt::BOOL_OPT, "BOOL = Print line numbers from where the lines are from (file1:file2).", "true"); opts->defineOption("","diff-print-same",PgOpt::BOOL_OPT, "BOOL = Print lines which are the same. " "(otherwise only changed lines.)", "false"); opts->defineOption("","diff-print-max",PgOpt::INT_OPT, "NUM = Max number of lines to print. (-1=> all)", "-1"); opts->defineOption("","diff-print-format",PgOpt::INT_OPT, "[1 or 2] = Select the format of the displayed diffs, one or two lines.", "1"); opts->defineOption("","diff-max-diff",PgOpt::DOUBLE_OPT, "NUM = Numerical differences smaller than than this are ignored.", "0.00001"); opts->defineOption("","diff-residuals",PgOpt::STRING_OPT, "FILENAME = File to output residuals to. (file1-file2)", ""); opts->defineOption("","print-duplicate-headerkeys",PgOpt::BOOL_OPT, "Print the duplicate headers.", "0"); // opts->parseArgv(argv); // if (opts->getBool("help")||(argc==1)) { opts->usage(); exit(0); } else if (opts->getBool("linecount")) { for (int a=0;agetArgCount();a++) { tsv_util_linecount(opts->getArg(a)); } } else if (opts->getBool("headers")) { for (int a=0;agetArgCount();a++) { tsv_util_headers(opts->getArg(a)); } } // Conversions else if (opts->getBool("to-csv")) { if (opts->getArgCount()<2) { printf("Need two args."); opts->usage(); } tsv_util_copy(opts->getArg(0),opts->getArg(1),1); } else if (opts->getBool("to-tsv")) { if (opts->getArgCount()<2) { printf("Need two args."); opts->usage(); } tsv_util_copy(opts->getArg(0),opts->getArg(1),2); } // else if (opts->get("paste")!="") { tsv_util_paste(opts->get("paste"), opts->getArgVector(), opts->getInt("seg-lines"), opts->get("key-col"), opts->getInt("verbose"), opts->getInt("max-paste-fh")); } // else if (opts->getBool("diff")||opts->getBool("diff-headers")||opts->getBool("diff-data")) { if (opts->getArgCount()!=2) { printf("Need exactly two args."); opts->usage(); } else { // --diff implies both headers and data if (opts->getBool("diff")) { if (!opts->mustFindOpt("diff-headers")->isSet()) { opts->mustFindOpt("diff-headers")->setValue("1"); } if (!opts->mustFindOpt("diff-data")->isSet()) { opts->mustFindOpt("diff-data")->setValue("1"); } } // affx::TsvFileDiff diff; // set our options diff.m_opt_do_headers=opts->getBool("diff-headers"); diff.m_opt_do_data=opts->getBool("diff-data"); // diff.m_opt_print_linenums=opts->getBool("diff-print-linenums"); diff.m_opt_print_same=opts->getBool("diff-print-same"); diff.m_opt_print_format=opts->getInt("diff-print-format"); diff.m_opt_print_max=opts->getInt("diff-print-max"); diff.m_opt_max_diff=opts->getDouble("diff-max-diff"); diff.m_residuals_filename=opts->get("diff-residuals"); //diff.m_residuals_filename=opts->getArg(1)+".residuals"; // do the diff diff.diff(opts->getArg(0),opts->getArg(1)); // if (diff.m_diff_cnt!=0) { printf("### %d diff\n",diff.m_diff_cnt); } } } // else if (opts->getBool("print-duplicate-headerkeys")) { int cnt=0; for (int i=0;igetArgCount();i++) { if (tsv_util_print_duplicate_headerkeys(opts->getArg(i))) { cnt++; } } // dont fiddle with the exit value. // rv=(cnt==0)?0:1; } // else if (opts->getInt("benchmark")!=0) { tsv_util_benchmark(opts->getInt("benchmark"),opts->get("output")); } // delete opts; return rv; } affxparser/src/fusion/file/test-fileio.cpp0000644000175200017520000001210614516003651021677 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2004 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // // cvs:affy/sdk/file/test-fileio.cpp --- // // $Id: test-fileio.cpp,v 1.14 2009-09-25 17:49:00 mspald Exp $ // // Commentary: // * Run the FileIO.cpp though some tests. // // g++ test-fileio // // NOTE: These are cast into "std::fstream" #include "file/FileIO.h" #include "file/FileWriter.h" // #include "portability/affy-base-types.h" // #include #include #include #include #include #include #include // using namespace std; #define HDR_SIZE 16 #define HDR_W(ostr) WRITE_STRING(ostm,str) #define HDR_R() do { string str; \ ReadFixedString(istm,str,HDR_SIZE); \ printf("%s\n",str.c_str()); \ } while (0) #define TEST_STR "0123456789AB" #define TEST_CNT_INT32 (2*(16/4)) #define TEST_CNT_INT16 (2*(16/2)) #define TEST_CNT_INT8 (2*(16/1)) #define TEST_CNT_FLOAT (2*(16/4)) #define TEST_CNT_STRING 4 int main(int argc,char *argv[]) { int32_t a=1; int32_t i,h,n; float f,g; string str; ofstream ostm; ifstream istm; // if ((a y=%08X %s\n",i,x,y,((x==y)?"":"Error")) HDR_R(); for (i=0;i g=%9.2f %s\n",i,f,g,((f==g)?"":"Error")); f*=2.0; } HDR_R(); for (i=0;i defines a number of types which // are of a known bit size which a programmer can use when // the size of the type is important. Normally in IO. // // * A user can include this file in place of // or they can test for _WIN32 and include one file or the other. // It will work either way. // // #ifdef _WIN32 // #include "affy-base-types.h" // #else // #include // #endif // // * You should not use "long" if you can help it. // The size of that type changes depending on the cpu. // ///////////////////////////////////////////////////////////////// #ifndef _AFFY_BASE_TYPE_H #define _AFFY_BASE_TYPE_H 1 /*! \file affy-base-types.h This file defines types for consistency across platforms. */ #include "R_affx_constants.h" /*! Microsoft Visual Studio C++ doesn't implement std::min() and std::max() due to possible conflict with min() and max() defined in windows.h. */ #ifdef _MSC_VER // vc++ version #include #define Max(a,b) max(a,b) #define Min(a,b) min(a,b) #else // assume other people implement the standard #include #define Max(a,b) std::max(a,b) #define Min(a,b) std::min(a,b) #endif // _MSC_VER #ifdef WIN64 #include #endif /* "WIN32" should always be defined by MS C++ */ /* It shouldnt be defined on any other platform */ #ifndef WIN32 /* we are on a unix system -- use the native ... */ /* ...except on a sun which doesnt have it. (but they are defined) */ #ifndef sun #include #endif #else #ifndef __MINGW32__ /* We are on windows -- define the types. */ #ifndef __int8_t_defined #define __int8_t_defined 1 /*! An 8 bit signed integer */ typedef char int8_t; /*! A 16 bit signed integer */ typedef short int int16_t; /*! A 32 bit signed integer */ typedef int int32_t; #ifndef __VC60__ /*! A 64 bit signed integer */ typedef long long int int64_t; #endif /* __VC60__ */ /*! An 8 bit unsigned integer. */ typedef unsigned char uint8_t; /*! A 16 bit unsigned integer. */ typedef unsigned short int uint16_t; /*! A 32 bit unsigned integer. */ typedef unsigned int uint32_t; #ifndef __VC60__ /*! A 64 bit unsigned integer. */ typedef unsigned long long int uint64_t; #endif /* __VC60__ */ #endif /* __int8_t_defined */ #else /*! A 16 bit unsigned integer. */ typedef unsigned short int uint16_t; /*! A 32 bit unsigned integer. */ typedef unsigned int uint32_t; #endif /* __MINGW32__ */ #endif /* WIN32 */ ////////// // I didnt see a standard, so lets pretend that everything // is linux and use their defs // These set up the numeric values to be used. #ifndef LITTLE_ENDIAN #define LITTLE_ENDIAN 1234 #endif #ifndef BIG_ENDIAN #define BIG_ENDIAN 4321 #endif // now we set BYTE_ORDER for the platform if it hasnt been set already #ifndef BYTE_ORDER // Big guys first... #ifdef __ppc__ #define BYTE_ORDER BIG_ENDIAN #endif #ifdef __ppc64__ #define BYTE_ORDER BIG_ENDIAN #endif #ifdef __sparc__ #define BYTE_ORDER BIG_ENDIAN #endif // ...then the little ones. #ifdef __i386__ #define BYTE_ORDER LITTLE_ENDIAN #endif #ifdef __x86_64__ #define BYTE_ORDER LITTLE_ENDIAN #endif #ifdef WIN32 #define BYTE_ORDER LITTLE_ENDIAN #endif // BYTE_ORDER #endif // Check that one of the cases above set BYTE_ORDER. // Otherwise some compiled code might be BIG_ENDIAN and some might be LITTLE_ENDIAN, // depending on how the test is written. #ifndef BYTE_ORDER #error "BYTE_ORDER is not set" #endif ////////// /* This is also in "AffymetrixBaseTypes.h" * Be sure to only get it once. (keep that def there in sync.) */ /// @todo remove the duplicate def in "AffymetrixBaseTypes.h" #ifndef _AFFY_TYPE_PUNNED_ #define _AFFY_TYPE_PUNNED_ /* Use a union to allow type-punning without having to * use "-fnostrict-alias" on the entire file. * See the GCC info page about -fstrict-alias for the details. * We used to do "*(uint32_t)&floatvar" but that would break * on some versions of gcc. */ union type_punned { float v_float; int v_int32; unsigned int v_uint32; }; #endif #endif /* affy-base-types.h */ affxparser/src/fusion/portability/affy-system-api.h0000644000175200017520000000342214516003651023562 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License (version 2) as // published by the Free Software Foundation. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program;if not, write to the // // Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////// // // Commentary: // // * On unix, defines a number of api calls // with an operating-system specific calling syntax. // // * A user can include this file in place of // or they can test for _WIN32 and include one file or the other. // It will work either way. // // #ifdef _WIN32 // #include "affy-system-api.h" // #else // #include // #endif // ///////////////////////////////////////////////////////////////// #ifndef _AFFY_SYSTEM_API_H #define _AFFY_SYSTEM_API_H 1 /*! @file affy-system-api.h This file defines operating system API functions for consistency across platforms. */ #ifdef _MSC_VER //#pragma warning(disable: 4996) // ignore deprecated functions warning #include #define snprintf _snprintf #define getpid _getpid #define gethostid() 0 #else #include #endif /* _MSC_VER */ #endif /* affy-system-api.h */ affxparser/src/fusion/portability/apt-no-warnings.h0000644000175200017520000000327614516003651023577 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2011 Affymetrix, Inc. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License (version 2) as // published by the Free Software Foundation. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program;if not, write to the // // Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // // affy/sdk/portability/apt-no-warnings.h --- // // $Id$ // /// @file include this file to disable some of the warnings /// we have in APT and havent gotten around to fixing yet. // @todo Including this file should be regarded as a bug, // but better than missing a warning hidden in the rest. #ifndef _APT_NO_WARNINGS_H_ #define _APT_NO_WARNINGS_H_ #if defined(_MSC_VER) // to turn these warnings back on, use // #pragma warning( LVL : NUMBER ) // where "LVL" is the level at which it should be reported. // turn off warnings about signed and unsigned comparisons. #pragma warning( disable: 4018 ) // dont complain about "unsafe" functions. // for this to work, this must be defined BEFORE including the standard libaries. // #define _CRT_SECURE_NO_WARNINGS 1 #pragma warning( disable: 4996 ) //#pragma warning( 1 : 4996 ) #endif #endif // _APT_NO_WARNINGS_H_ affxparser/src/fusion/portability/apt-win-dll.h0000644000175200017520000000267414516003651022704 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License (version 2) as // published by the Free Software Foundation. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program;if not, write to the // // Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /// @file This is used to put in the decls for windows DLLs /// It expands to nothing when APT_LIB_EXPORTS is not set. #ifndef _PORTABILITY_APT_WIN_DLL_H_ #define _PORTABILITY_APT_WIN_DLL_H_ #ifdef WIN32 #ifdef APT_LIB_EXPORTS // when building DLL, target project defines this macro #define APTLIB_API __declspec(dllexport) #elif defined(APT_LIB_IMPORTS) // when using DLL, client project defines this macro #define APTLIB_API __declspec(dllimport) #else // when building or using target static library, or whatever: define it as nothing #define APTLIB_API #endif // APT_EXPORT #else #define APTLIB_API #endif // WIN32 #endif affxparser/src/fusion/util/0000755000175200017520000000000014516022540017003 5ustar00biocbuildbiocbuildaffxparser/src/fusion/util/AffxArray.h0000644000175200017520000002343114516003651021044 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffxArray_h_ #define _AffxArray_h_ /** * @file AffxArray.h * * @brief This header contains the AffxArray class definition. */ #include "util/AffxByteArray.h" #include "util/AffxString.h" // #include #include #include // /** * @brief A template class derived from std::vector. * */ template class AffxArray : public std::vector { public: static int compareAsInt(const AffxString& strThis, const AffxString& strThat) { AffxByteArray baThis(strThis); AffxByteArray baThat(strThat); int iThis = 0; int iThat = 0; try{iThis = baThis.parseInt();} catch(...) {iThis = 0;} try{iThat = baThat.parseInt();} catch(...) {iThat = 0;} return compare(iThis, iThat); } static int compareAsIntDescending(const AffxString& strThis, const AffxString& strThat) { AffxByteArray baThis(strThis); AffxByteArray baThat(strThat); int iThis = 0; int iThat = 0; try{iThis = baThis.parseInt();} catch(...) {iThis = 0;} try{iThat = baThat.parseInt();} catch(...) {iThat = 0;} return compareDescending(iThis, iThat); } static int compareCase(const AffxString& iThis, const AffxString& iThat) { return iThis.compareTo(iThat, 0); // int iCompareResult = 0; // if (iThis > iThat) {iCompareResult = 1;} // else if (iThis < iThat) {iCompareResult = -1;} // return iCompareResult; } static int compareNoCase(const AffxString& iThis, const AffxString& iThat) { return iThis.CompareNoCase(iThat); // int iCompareResult = 0; // if (iThis > iThat) {iCompareResult = 1;} // else if (iThis < iThat) {iCompareResult = -1;} // return iCompareResult; } // false then true. static int compare(bool bThis, bool bThat) { int iCompareResult = 0; if ((bThis) && (!bThat)) {iCompareResult = 1;} else if ((!bThis) && (bThat)) {iCompareResult = -1;} return iCompareResult; } static int compare(int iThis, int iThat) { int iCompareResult = 0; if (iThis > iThat) {iCompareResult = 1;} else if (iThis < iThat) {iCompareResult = -1;} return iCompareResult; } static int compare(unsigned int iThis, unsigned int iThat) { int iCompareResult = 0; if (iThis > iThat) {iCompareResult = 1;} else if (iThis < iThat) {iCompareResult = -1;} return iCompareResult; } static int compare(double dThis, double dThat) { int iCompareResult = 0; if (dThis > dThat) {iCompareResult = 1;} else if (dThis < dThat) {iCompareResult = -1;} return iCompareResult; } static int compareCaseDescending(const AffxString& iThis, const AffxString& iThat) { int iCompareResult = 0; if (iThis > iThat) {iCompareResult = -1;} else if (iThis < iThat) {iCompareResult = 1;} return iCompareResult; } // true then false. static int compareDescending(bool bThis, bool bThat) { int iCompareResult = 0; if ((!bThis) && (bThat)) {iCompareResult = 1;} else if ((bThis) && (!bThat)) {iCompareResult = -1;} return iCompareResult; } static int compareDescending(int iThis, int iThat) { int iCompareResult = 0; if (iThis < iThat) {iCompareResult = 1;} else if (iThis > iThat) {iCompareResult = -1;} return iCompareResult; } static int compareDescending(double dThis, double dThat) { int iCompareResult = 0; if (dThis < dThat) {iCompareResult = 1;} else if (dThis > dThat) {iCompareResult = -1;} return iCompareResult; } public: AffxArray(int iSize, int iCapacity) {reserve(iCapacity); resize(iSize);} AffxArray() {reserve(1000);} virtual ~AffxArray() {} void reserve(int iSize) {return std::vector::reserve(iSize);} typename std::vector::size_type size() const {return std::vector::size();} void resize(int iNewSize) {return std::vector::resize(iNewSize);} TYPE*& at(int iIndex) {return std::vector::at(iIndex);} void nullAll() { for (int iIndex = 0; (iIndex < getCount()); iIndex++) { at(iIndex) = NULL; } std::vector::clear(); } void deleteAll() { for (int iIndex = 0; (iIndex < getCount()); iIndex++) { if (at(iIndex) != NULL) {delete at(iIndex);} at(iIndex) = NULL; } std::vector::clear(); } int getCount() {return (int)size();} void setAt(int iIndex, TYPE* obj) {if ((iIndex >= 0) && (iIndex < getCount())) {at(iIndex) = obj;}} TYPE* getAt(int iIndex) { TYPE* obj = NULL; if ((iIndex >= 0) && (iIndex < getCount())) { obj = at(iIndex); } return obj; } void add(TYPE* obj) { push_back(obj); } int binarySearch(TYPE& obj, int iCompareCode) { int iSearchIndex = -1; int iLow = 0; int iHigh = getCount() - 1; int iMid = 0; int iCompareResult = 0; while (iLow <= iHigh) { iMid = (iLow + iHigh) / 2; iCompareResult = obj.compareTo(*at(iMid), iCompareCode); if (iCompareResult < 0) { iHigh = iMid - 1; } else if (iCompareResult > 0) { iLow = iMid + 1; } else { iSearchIndex = iMid; break; } } return iSearchIndex; } void quickSort() {/*shuffle();*/ quickSort(0, (getCount() - 1), 0);} void quickSort(int iCompareCode) {/*shuffle();*/ quickSort(0, (getCount() - 1), iCompareCode);} void quickSort(int iFrom, int iTo, int iCompareCode) { int jTo = iTo + 1; // If a new value of iCompareCode for elements of type TYPE is required, add the corresponding new // case to the switch statement below and add the specialized template functor implementing the new // comparison to class TYPE. See CNProbeSet.h for an example. // // The "::template" qualifiers below are needed to tell the compiler not to interpret the left angle // bracket as "less than". The "typename" then similarly indicates that ComparePred() is not // a template function but a template class. // switch (iCompareCode) { case 0: std::sort(this->begin() + iFrom, this->begin() + jTo, typename TYPE::template ComparePred<0>()); break; case 1: std::sort(this->begin() + iFrom, this->begin() + jTo, typename TYPE::template ComparePred<1>()); break; case 2: std::sort(this->begin() + iFrom, this->begin() + jTo, typename TYPE::template ComparePred<2>()); break; case 3: std::sort(this->begin() + iFrom, this->begin() + jTo, typename TYPE::template ComparePred<3>()); break; default: Err::errAbort("Unknown compare code found in AffxArray"); } } void swap(int iIndex1, int iIndex2) { TYPE* temp = at(iIndex1); at(iIndex1) = at(iIndex2); at(iIndex2) = temp; } void shuffle() { int iNewIndex = 0; TYPE* pobj = NULL; for (int iIndex = (getCount() - 1); (iIndex > 0); iIndex--) { iNewIndex = (rand() % getCount()); if ((iNewIndex >= 0) && (iNewIndex < getCount())) { pobj = at(iNewIndex); at(iNewIndex) = at(iIndex); at(iIndex) = pobj; } } } void qsort() {qsort(0, (getCount() - 1), 0);} void qsort(int iCompareCode) {qsort(0, (getCount() - 1), iCompareCode);} void qsort(int iFrom, int iTo, int iCompareCode) { if (getCount() >= 2) { int i = iFrom; int j = iTo; TYPE* objCenter = getAt((iFrom + iTo) / 2); do { while ((i < iTo) && (objCenter->compareTo(*getAt(i), iCompareCode) > 0)) i++; while ((j > iFrom) && (objCenter->compareTo(*getAt(j), iCompareCode) < 0)) j--; if (i < j) {TYPE* temp = getAt(i); setAt(i, getAt(j)); setAt(j, temp);} if (i <= j) { i++; j--; } } while (i <= j); if (iFrom < j) {qsort(iFrom, j, iCompareCode);} if (i < iTo) {qsort(i, iTo, iCompareCode);} } } void removeAt(int iIndex) { erase(std::vector::begin() + iIndex, std::vector::begin() + iIndex + 1); } void deleteAt(int iIndex) { delete at(iIndex); erase(std::vector::begin() + iIndex, std::vector::begin() + iIndex + 1); } }; #endif // _AffxArray_h_ affxparser/src/fusion/util/AffxBinaryFile.cpp0000644000175200017520000000405114516003651022342 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file AffxBinaryFile.cpp * * @brief This file contains the AffxBinaryFile class members. */ #include "util/AffxBinaryFile.h" AffxBinaryFile::AffxBinaryFile() { m_pistrm = NULL; m_postrm = NULL; } AffxBinaryFile::~AffxBinaryFile() { close(); } bool AffxBinaryFile::open(const AffxString& strFileName, m_enumOpenOptions enumOpenOption) { std::fstream fstrm; switch (enumOpenOption) { case LOAD: fstrm.open(strFileName.c_str(), std::fstream::in | std::fstream::binary); if (fstrm.is_open()) {fstrm.close();} else {return false;} m_pistrm = new std::ifstream(strFileName.c_str(), std::ios::in | std::ios::binary); if (m_pistrm == NULL) {return false;} break; case SAVE: m_postrm = new std::ofstream(strFileName.c_str(), std::ios::out | std::ios::binary); if (m_postrm == NULL) {return false;} break; case APPEND: m_postrm = new std::ofstream(strFileName.c_str(), std::ios::out | std::ios::binary | std::ios::app); if (m_postrm == NULL) {return false;} break; default: return false; } return true; } void AffxBinaryFile::close() { if (m_pistrm != NULL) {m_pistrm->close(); delete m_pistrm; m_pistrm = NULL;} if (m_postrm != NULL) {m_postrm->close(); delete m_postrm; m_postrm = NULL;} } affxparser/src/fusion/util/AffxBinaryFile.h0000644000175200017520000003537514516003651022024 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffxBinaryFile_h_ #define _AffxBinaryFile_h_ /** * @file AffxBinaryFile.h * * @brief This header contains the AffxBinaryFile class definition. */ // #include "util/AffxString.h" // #include #include /** * @brief A class for managing binary files. * */ class AffxBinaryFile { public: enum m_enumOpenOptions{LOAD = 0, SAVE, APPEND}; private: std::ifstream* m_pistrm; std::ofstream* m_postrm; public: AffxBinaryFile(); virtual ~AffxBinaryFile(); bool open(const AffxString& strFileName, m_enumOpenOptions enumOpenOption); void close(); bool isLittleEndian() { #ifndef BYTE_ORDER return true; #else return (BYTE_ORDER == LITTLE_ENDIAN); #endif } unsigned int getOffset() {return (unsigned int)m_pistrm->tellg();} void setOffset(unsigned int ui) {m_pistrm->seekg(ui, std::ios::beg);} void advanceOffset(unsigned int ui) {setOffset(getOffset() + ui);} // All Affx binaries should be in Big Endian except for binary CDF file. // This code is written from a Little Endian point of view. For x-platform use the Big Endian / Little Endian functions. double readDouble() {double d = 0; if (m_pistrm != NULL) {m_pistrm->read((char*)&d, sizeof(double));} return d;} float readFloat() {float f = 0; if (m_pistrm != NULL) {m_pistrm->read((char*)&f, sizeof(float));} return f;} int readInt() {int i = 0; if (m_pistrm != NULL) {m_pistrm->read((char*)&i, sizeof(int));} return i;} unsigned int readUnsignedInt() {unsigned int ui = 0; if (m_pistrm != NULL) {m_pistrm->read((char*)&ui, sizeof(unsigned int));}return ui;} // __int64 readLong64() {__int64 l = 0; if (m_pistrm != NULL) {m_pistrm->read((char*)&l, sizeof(__int64));} return l;} short readShort() {short n = 0; if (m_pistrm != NULL) {m_pistrm->read((char*)&n, sizeof(short));}return n;} unsigned short readUnsignedShort() {unsigned short un = 0; if (m_pistrm != NULL) {m_pistrm->read((char*)&un, sizeof(unsigned short));}return un;} char readChar() {char c = 0; if (m_pistrm != NULL) {m_pistrm->read((char*)&c, sizeof(char));}return c;} unsigned char readUnsignedChar() {unsigned char uc = 0; if (m_pistrm != NULL) {m_pistrm->read((char*)&uc, sizeof(unsigned char));}return uc;} AffxString readString() { AffxString str; int iStringLength = readInt(); if (iStringLength > 0) { char* pszValue = new char[iStringLength + 1]; m_pistrm->read(pszValue, iStringLength); pszValue[iStringLength] = 0; str = pszValue; delete[] pszValue; } return str; } AffxString readString(char* pszValue, int iStringLength) { AffxString str; if (iStringLength > 0) { m_pistrm->read(pszValue, iStringLength); pszValue[iStringLength] = 0; str = pszValue; } return str; } AffxString readString(int iStringLength) { AffxString str; if (iStringLength > 0) { char* pszValue = new char[iStringLength + 1]; m_pistrm->read(pszValue, iStringLength); pszValue[iStringLength] = 0; str = pszValue; delete[] pszValue; } return str; } AffxString readWString(wchar_t* pszValue, int iStringLength) { AffxString str; if (iStringLength > 0) { for (int iIndex = 0; (iIndex < iStringLength); iIndex++) { unsigned short un = readUnsignedShort(); *(pszValue + iIndex) = (wchar_t)un; str += (char)(un >> 8); } pszValue[iStringLength] = 0; delete[] pszValue; } return str; } AffxString readBigEndianHalfWString() { AffxString str; int iStringLength = readBigEndianInt(); iStringLength = iStringLength / 2; if (iStringLength > 0) { wchar_t* pszValue = new wchar_t[iStringLength + 1]; for (int iIndex = 0; (iIndex < iStringLength); iIndex++) { unsigned short un = readUnsignedShort(); *(pszValue + iIndex) = (wchar_t)un; str += (char)(un >> 8); } pszValue[iStringLength] = 0; delete[] pszValue; } return str; } AffxString readHalfWString(int iStringLength) { AffxString str; iStringLength = iStringLength / 2; if (iStringLength > 0) { wchar_t* pszValue = new wchar_t[iStringLength + 1]; for (int iIndex = 0; (iIndex < iStringLength); iIndex++) { unsigned short un = readUnsignedShort(); *(pszValue + iIndex) = (wchar_t)un; str += (char)(un >> 8); } pszValue[iStringLength] = 0; delete[] pszValue; } return str; } short readBigEndianShort() { int i = 0; int ch1 = readUnsignedChar(); int ch2 = readUnsignedChar(); if (isLittleEndian()) {i = ((ch1 << 8) + (ch2 << 0));} else {i = ((ch1 << 0) + (ch2 << 8));} return (short)i; } unsigned short readBigEndianUnsignedShort() { int i = 0; int ch1 = readUnsignedChar(); int ch2 = readUnsignedChar(); if (isLittleEndian()) {i = ((ch1 << 8) + (ch2 << 0));} else {i = ((ch2 << 8) + (ch1 << 0));} return (unsigned short)i; } int readBigEndianInt() { int i = 0; int ch1 = readUnsignedChar(); int ch2 = readUnsignedChar(); int ch3 = readUnsignedChar(); int ch4 = readUnsignedChar(); if (isLittleEndian()) {i = ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));} else {i = ((ch4 << 24) + (ch3 << 16) + (ch2 << 8) + (ch1 << 0));} return i; } unsigned int readBigEndianUnsignedInt() { unsigned int ui = 0; int ch1 = readUnsignedChar(); int ch2 = readUnsignedChar(); int ch3 = readUnsignedChar(); int ch4 = readUnsignedChar(); if (isLittleEndian()) {ui = ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));} else {ui = ((ch4 << 24) + (ch3 << 16) + (ch2 << 8) + (ch1 << 0));} return ui; } float readBigEndianFloat() { int i = 0; int ch1 = readUnsignedChar(); int ch2 = readUnsignedChar(); int ch3 = readUnsignedChar(); int ch4 = readUnsignedChar(); if (isLittleEndian()) {i = ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));} else {i = ((ch4 << 24) + (ch3 << 16) + (ch2 << 8) + (ch1 << 0));} void* p = &i; return *(float*)p; // return (float)((float*)&i); } AffxString readBigEndianString() { AffxString str; int iStringLength = readBigEndianInt(); if (iStringLength > 0) { char* pszValue = new char[iStringLength + 1]; m_pistrm->read(pszValue, iStringLength); pszValue[iStringLength] = 0; str = pszValue; delete[] pszValue; } return str; } AffxString readBigEndianString(int iStringLength) { AffxString str; readBigEndianInt(); iStringLength -= 4; if (iStringLength > 0) { char* pszValue = new char[iStringLength + 1]; m_pistrm->read(pszValue, iStringLength); pszValue[iStringLength] = 0; str = pszValue; delete[] pszValue; } return str; } AffxString readBigEndianWString() { int iStringLength = readBigEndianInt(); AffxString str; if (iStringLength > 0) { wchar_t* pszValue = new wchar_t[iStringLength + 1]; for (int iIndex = 0; (iIndex < iStringLength); iIndex++) { unsigned short un = readUnsignedShort(); *(pszValue + iIndex) = (wchar_t)un; str += (char)(un >> 8); } pszValue[iStringLength] = 0; delete[] pszValue; } return str; } short readLittleEndianShort() { int i = 0; int ch1 = readUnsignedChar(); int ch2 = readUnsignedChar(); if (!isLittleEndian()) {i = ((ch1 << 8) + (ch2 << 0));} else {i = ((ch1 << 0) + (ch2 << 8));} return (short)i; } unsigned short readLittleEndianUnsignedShort() { int i = 0; int ch1 = readUnsignedChar(); int ch2 = readUnsignedChar(); if (!isLittleEndian()) {i = ((ch1 << 8) + (ch2 << 0));} else {i = ((ch2 << 8) + (ch1 << 0));} return (unsigned short)i; } int readLittleEndianInt() { int i = 0; int ch1 = readUnsignedChar(); int ch2 = readUnsignedChar(); int ch3 = readUnsignedChar(); int ch4 = readUnsignedChar(); if (!isLittleEndian()) {i = ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));} else {i = ((ch4 << 24) + (ch3 << 16) + (ch2 << 8) + (ch1 << 0));} return i; } unsigned int readLittleEndianUnsignedInt() { unsigned int ui = 0; int ch1 = readUnsignedChar(); int ch2 = readUnsignedChar(); int ch3 = readUnsignedChar(); int ch4 = readUnsignedChar(); if (!isLittleEndian()) {ui = ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));} else {ui = ((ch4 << 24) + (ch3 << 16) + (ch2 << 8) + (ch1 << 0));} return ui; } float readLittleEndianFloat() { int i = 0; int ch1 = readUnsignedChar(); int ch2 = readUnsignedChar(); int ch3 = readUnsignedChar(); int ch4 = readUnsignedChar(); if (!isLittleEndian()) {i = ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));} else {i = ((ch4 << 24) + (ch3 << 16) + (ch2 << 8) + (ch1 << 0));} void* p = &i; return *(float*)p; // return (float)*((float*)&i); } AffxString readLittleEndianString() { AffxString str; int iStringLength = readLittleEndianInt(); if (iStringLength > 0) { char* pszValue = new char[iStringLength + 1]; m_pistrm->read(pszValue, iStringLength); pszValue[iStringLength] = 0; str = pszValue; delete[] pszValue; } return str; } void writeFloat(float f) {if (m_postrm != NULL) {m_postrm->write((char*)&f, sizeof(float));}} void writeInt(int i) {if (m_postrm != NULL) {m_postrm->write((char*)&i, sizeof(int));}} void writeUnsignedInt(unsigned int ui) {if (m_postrm != NULL) {m_postrm->write((char*)&ui, sizeof(unsigned int));}} void writeShort(short n) {if (m_postrm != NULL) {m_postrm->write((char*)&n, sizeof(short));}} void writeUnsignedShort(unsigned short un) {if (m_postrm != NULL) {m_postrm->write((char*)&un, sizeof(unsigned short));}} void writeChar(char c) {if (m_postrm != NULL) {m_postrm->write((char*)&c, sizeof(char));}} void writeUnsignedChar(unsigned char uc) {if (m_postrm != NULL) {m_postrm->write((char*)&uc, sizeof(unsigned char));}} void writeString(const AffxString& str) { int iStringLength = (int)str.length(); writeInt(iStringLength); if (iStringLength > 0) { if (m_postrm != NULL) {m_postrm->write(str.c_str(), iStringLength);} } } void writeString(const char* pszBuffer, int iStringLength) { if (m_postrm != NULL) {m_postrm->write(pszBuffer, iStringLength);} } void writeBigEndianShort(short i) { if (isLittleEndian()) { writeChar(((char*)&i)[1]); writeChar(((char*)&i)[0]); } else { writeChar(((char*)&i)[0]); writeChar(((char*)&i)[1]); } } void writeBigEndianUnsignedShort(unsigned short ui) { if (isLittleEndian()) { writeUnsignedChar(((unsigned char*)&ui)[1]); writeUnsignedChar(((unsigned char*)&ui)[0]); } else { writeUnsignedChar(((unsigned char*)&ui)[0]); writeUnsignedChar(((unsigned char*)&ui)[1]); } } void writeBigEndianInt(int i) { if (isLittleEndian()) { writeChar(((char*)&i)[3]); writeChar(((char*)&i)[2]); writeChar(((char*)&i)[1]); writeChar(((char*)&i)[0]); } else { writeChar(((char*)&i)[0]); writeChar(((char*)&i)[1]); writeChar(((char*)&i)[2]); writeChar(((char*)&i)[3]); } } void writeBigEndianUnsignedInt(unsigned int ui) { if (isLittleEndian()) { writeUnsignedChar(((unsigned char*)&ui)[3]); writeUnsignedChar(((unsigned char*)&ui)[2]); writeUnsignedChar(((unsigned char*)&ui)[1]); writeUnsignedChar(((unsigned char*)&ui)[0]); } else { writeUnsignedChar(((unsigned char*)&ui)[0]); writeUnsignedChar(((unsigned char*)&ui)[1]); writeUnsignedChar(((unsigned char*)&ui)[2]); writeUnsignedChar(((unsigned char*)&ui)[3]); } } void writeBigEndianFloat(float f) { if (isLittleEndian()) { writeChar(((char*)&f)[3]); writeChar(((char*)&f)[2]); writeChar(((char*)&f)[1]); writeChar(((char*)&f)[0]); } else { writeChar(((char*)&f)[0]); writeChar(((char*)&f)[1]); writeChar(((char*)&f)[2]); writeChar(((char*)&f)[3]); } } void writeBigEndianString(const AffxString& str) { int iStringLength = (int)str.length(); writeBigEndianInt(iStringLength); if (iStringLength > 0) { if (m_postrm != NULL) {m_postrm->write(str.c_str(), iStringLength);} } } void writeLittleEndianShort(short i) { if (!isLittleEndian()) { writeChar(((char*)&i)[1]); writeChar(((char*)&i)[0]); } else { writeChar(((char*)&i)[0]); writeChar(((char*)&i)[1]); } } void writeLittleEndianUnsignedShort(unsigned short ui) { if (!isLittleEndian()) { writeUnsignedChar(((unsigned char*)&ui)[1]); writeUnsignedChar(((unsigned char*)&ui)[0]); } else { writeUnsignedChar(((unsigned char*)&ui)[0]); writeUnsignedChar(((unsigned char*)&ui)[1]); } } void writeLittleEndianInt(int i) { if (!isLittleEndian()) { writeChar(((char*)&i)[3]); writeChar(((char*)&i)[2]); writeChar(((char*)&i)[1]); writeChar(((char*)&i)[0]); } else { writeChar(((char*)&i)[0]); writeChar(((char*)&i)[1]); writeChar(((char*)&i)[2]); writeChar(((char*)&i)[3]); } } void writeLittleEndianUnsignedInt(unsigned int ui) { if (!isLittleEndian()) { writeUnsignedChar(((unsigned char*)&ui)[3]); writeUnsignedChar(((unsigned char*)&ui)[2]); writeUnsignedChar(((unsigned char*)&ui)[1]); writeUnsignedChar(((unsigned char*)&ui)[0]); } else { writeUnsignedChar(((unsigned char*)&ui)[0]); writeUnsignedChar(((unsigned char*)&ui)[1]); writeUnsignedChar(((unsigned char*)&ui)[2]); writeUnsignedChar(((unsigned char*)&ui)[3]); } } void writeLittleEndianFloat(float f) { if (!isLittleEndian()) { writeChar(((char*)&f)[3]); writeChar(((char*)&f)[2]); writeChar(((char*)&f)[1]); writeChar(((char*)&f)[0]); } else { writeChar(((char*)&f)[0]); writeChar(((char*)&f)[1]); writeChar(((char*)&f)[2]); writeChar(((char*)&f)[3]); } } void writeLittleEndianString(const AffxString& str) { int iStringLength = (int)str.length(); writeLittleEndianInt(iStringLength); if (iStringLength > 0) { if (m_postrm != NULL) {m_postrm->write(str.c_str(), iStringLength);} } } }; #endif affxparser/src/fusion/util/AffxByteArray.cpp0000644000175200017520000007270114516003651022227 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file AffxByteArray.cpp * * @brief This file contains the AffxByteArray class members. */ // #include "util/AffxByteArray.h" #include "util/Fs.h" // #include #include #include // using namespace std; #define __minimum(a,b)(((a)<(b))?(a):(b)) #define __maximum(a,b)(((a)>(b))?(a):(b)) int AffxByteArray::getAllocLength() const { return m_nMaxSize; } int AffxByteArray::getSize() const { return m_nSize; } int AffxByteArray::getUpperBound() const { return m_nSize-1; } void AffxByteArray::removeAll() { setSize(0); } char AffxByteArray::getAt(int nIndex) const { return m_pData[nIndex]; } void AffxByteArray::setAt(int nIndex, char newElement) { m_pData[nIndex] = newElement; } char& AffxByteArray::elementAt(int nIndex) { return m_pData[nIndex]; } const char* AffxByteArray::getData() const { return (const char*)m_pData; } char* AffxByteArray::getData() { return (char*)m_pData; } int AffxByteArray::add(char newElement) { int nIndex = m_nSize; setAtGrow(nIndex, newElement); return nIndex; } char AffxByteArray::operator[](int nIndex) const { return getAt(nIndex); } char& AffxByteArray::operator[](int nIndex) { return elementAt(nIndex); } // Empty the data and free the memory allocation. void AffxByteArray::empty() { setSize(0); // shrink to nothing delete[] (char*)m_pData; m_pData = NULL; m_nSize = m_nMaxSize = 0; m_nSize = 0; } // Clear the data without freeing the memory allocation. void AffxByteArray::clear() { removeAt(0, m_nSize); } AffxByteArray::AffxByteArray() { m_pData = NULL; m_nSize = m_nMaxSize = m_nGrowBy = 0; m_bLocked = false; } AffxByteArray::AffxByteArray(const AffxByteArray& ba) { int iLength = ba.getSize(); m_nSize = m_nMaxSize = iLength; m_nGrowBy = 0; m_bLocked = false; // m_pData = (char*) new char[iLength * sizeof(char)]; m_pData = (char*) new char[iLength]; memcpy(m_pData, ba.m_pData, (iLength * sizeof(char))); } AffxByteArray::AffxByteArray(const AffxString& str) { AffxString str1 = str; int iLength = (int)str1.length(); m_nSize = m_nMaxSize = iLength; m_nGrowBy = 0; m_bLocked = false; m_pData = (char*) new char[iLength * sizeof(char)]; for (unsigned int iIndex = 0; (iIndex < str1.length()); iIndex++) { m_pData[iIndex] = (char)str1.charAt(iIndex); } // memcpy(m_pData, ba.m_pData, (iLength * sizeof(char))); } AffxByteArray::~AffxByteArray() { empty(); } void AffxByteArray::setSize(int nNewSize, int nGrowBy) { // If locked do not reduce the size of the allocation. if (m_bLocked) { m_nSize = nNewSize; return; } if (nGrowBy != -1) m_nGrowBy = nGrowBy; // set new size if (nNewSize == 0) { // shrink to nothing // delete[] (char*)m_pData; // m_pData = NULL; // m_nSize = m_nMaxSize = 0; m_nSize = 0; } else if (m_pData == NULL) { // create one with exact size m_pData = (char*) new char[nNewSize * sizeof(char)]; memset(m_pData, 0, nNewSize * sizeof(char)); // zero fill m_nSize = m_nMaxSize = nNewSize; } else if (nNewSize <= m_nMaxSize) { // it fits if (nNewSize > m_nSize) { // initialize the new elements memset(&m_pData[m_nSize], 0, (nNewSize-m_nSize) * sizeof(char)); } m_nSize = nNewSize; } else { // otherwise, grow array int nGrowBy = m_nGrowBy; if (nGrowBy == 0) { // heuristically determine growth when nGrowBy == 0 // (this avoids heap fragmentation in many situations) nGrowBy = __minimum(1024, __maximum(4, m_nSize / 8)); } int nNewMax; if (nNewSize < m_nMaxSize + nGrowBy) nNewMax = m_nMaxSize + nGrowBy; // granularity else nNewMax = nNewSize; // no slush char* pNewData = (char*) new char[nNewMax * sizeof(char)]; // copy new data from old memcpy(pNewData, m_pData, m_nSize * sizeof(char)); // construct remaining elements memset(&pNewData[m_nSize], 0, (nNewSize-m_nSize) * sizeof(char)); // get rid of old stuff (note: no destructors called) delete[] (char*)m_pData; m_pData = pNewData; m_nSize = nNewSize; m_nMaxSize = nNewMax; } } int AffxByteArray::append(const AffxString& srcIn) { AffxString src = srcIn; int nOldSize = m_nSize; setSize(m_nSize + (int)src.length()); memcpy(m_pData + nOldSize, src.c_str(), src.length() * sizeof(char)); return nOldSize; } int AffxByteArray::append(const AffxByteArray& src) { int nOldSize = m_nSize; setSize(m_nSize + src.m_nSize); memcpy(m_pData + nOldSize, src.m_pData, src.m_nSize * sizeof(char)); return nOldSize; } void AffxByteArray::copy(const AffxByteArray& src) { setSize(src.m_nSize); memmove(m_pData, src.m_pData, src.m_nSize * sizeof(char)); } void AffxByteArray::freeExtra() { if (m_bLocked) return; if (m_nSize != m_nMaxSize) { // shrink to desired size char* pNewData = NULL; if (m_nSize != 0) { pNewData = (char*) new char[m_nSize * sizeof(char)]; // copy new data from old memcpy(pNewData, m_pData, m_nSize * sizeof(char)); } // get rid of old stuff (note: no destructors called) delete[] (char*)m_pData; m_pData = pNewData; m_nMaxSize = m_nSize; } } ///////////////////////////////////////////////////////////////////////////// void AffxByteArray::setAtGrow(int nIndex, char newElement) { if (nIndex >= m_nSize) setSize(nIndex+1); m_pData[nIndex] = newElement; } void AffxByteArray::insertAt(int nIndex, char newElement, int nCount) { if (nIndex >= m_nSize) { // adding after the end of the array setSize(nIndex + nCount); // grow so nIndex is valid } else { // inserting in the middle of the array int nOldSize = m_nSize; setSize(m_nSize + nCount); // grow it to new size // shift old data up to fill gap memmove(&m_pData[nIndex+nCount], &m_pData[nIndex], (nOldSize-nIndex) * sizeof(char)); // re-init slots we copied from memset(&m_pData[nIndex], 0, nCount * sizeof(char)); } // insert new m_pData in the gap while (nCount--) m_pData[nIndex++] = newElement; } void AffxByteArray::removeAt(int nIndex, int nCount) { // just remove a range int nMoveCount = m_nSize - (nIndex + nCount); if (nMoveCount) memcpy(&m_pData[nIndex], &m_pData[nIndex + nCount], nMoveCount * sizeof(char)); m_nSize -= nCount; } void AffxByteArray::insertAt(int nStartIndex, AffxByteArray* pNewArray) { if (pNewArray->getSize() > 0) { insertAt(nStartIndex, pNewArray->getAt(0), pNewArray->getSize()); for (int i = 0; i < pNewArray->getSize(); i++) setAt(nStartIndex + i, pNewArray->getAt(i)); } } // Assignment operator. const AffxByteArray& AffxByteArray::operator=(const AffxByteArray& ba) { if (this == &ba) return *this; empty(); setSize(ba.getSize()); copy(0, ba, 0, ba.getSize()); return *this; } bool AffxByteArray::operator==(const AffxByteArray &ba) { bool bEqual = false; int iThisSize = getSize(); int iThatSize = ba.getSize(); if (iThisSize == iThatSize) { for (int i = 0; (i < iThisSize); i++) { if (getAt(i) == ba.getAt(i)) bEqual = true; else { bEqual = false; break; } } } return bEqual; } bool AffxByteArray::equals(const AffxString &str) const { bool bEqual = false; int iThisSize = getSize(); int iThatSize = (int)str.length(); if (iThisSize == iThatSize) { if ((iThisSize == 0) && (iThatSize == 0)) {bEqual = true;} else { for (int i = 0; (i < iThisSize); i++) { if (getAt(i) == str.getAt(i)) bEqual = true; else { bEqual = false; break; } } } } return bEqual; } bool AffxByteArray::equalsIgnoreCase(const AffxString &str) { bool bEqual = false; int iThisSize = getSize(); int iThatSize = (int)str.length(); if (iThisSize == iThatSize) { for (int i = 0; (i < iThisSize); i++) { if (getAt(i) >= 'A' && getAt(i) <= 'Z') { if ((getAt(i) == str.getAt(i)) || (getAt(i) + ' ' == str.getAt(i))) bEqual = true; else { bEqual = false; break; } } else if (getAt(i) >= 'a' && getAt(i) <= 'z') { if ((getAt(i) == str.getAt(i)) || (getAt(i) - ' ' == str.getAt(i))) bEqual = true; else { bEqual = false; break; } } else if (getAt(i) == str.getAt(i)) bEqual = true; else { bEqual = false; break; } } } return bEqual; } int AffxByteArray::getInt(int &iIndex) { char buffer[64]; short i = 0; int iSize = getSize(); while ((iIndex < iSize) && (i < 63) && (isdigit(getAt(iIndex)))) { buffer[i] = (char)(getAt(iIndex)); iIndex++; i++; } buffer[i] = '\0'; return atol(buffer); } void AffxByteArray::copy(int iDestStart, const AffxByteArray& src, int iSrcStart, int iLength) { if (getSize() < (iDestStart + iLength)) setSize(iDestStart + iLength); memmove((m_pData + iDestStart), (src.m_pData + iSrcStart), (iLength * sizeof(char))); } // Read one line from the byte array into a StringBuffer object. void AffxByteArray::setOffset(int iOffset) {m_iOffset = iOffset;} bool AffxByteArray::getLine(AffxByteArray& ba) { bool bSuccessful = false; int iLength = 0; ba.setSize(255); int iStart = m_iOffset; int iIndex = m_iOffset; if (getSize() > 0) { int i = 0; int iLast = 0; while ((iIndex < getSize()) && ((i = (int)getAt(iIndex)) != 10)) { iIndex++; iLast = i; } iLength = iIndex - iStart; m_iOffset = m_iOffset + iLength + 1; if ((iIndex >= getSize()) && (iLength == 0)) { ba.setSize(0); } else { bSuccessful = true; if ((i == 10) && (iLast == 13)) { if (iLength > 0) iLength--; } ba.setSize(iLength); ba.copy(0, *this, iStart, iLength); } } return bSuccessful; } // Count the number of columns separeted by tab characters. int AffxByteArray::getColumnCount() { int iColumnCount = 1; for (int i = 0; (i < getSize()); i++) { if (getAt(i) == 9) { iColumnCount++; } } return iColumnCount; } // Get the column m_pData. AffxByteArray& AffxByteArray::getColumn(int iColumnNumber, AffxByteArray& ba) { if (iColumnNumber == 0) { ba.setSize(0); return ba; } int iColumnCount = 1; int iStart = 0; int iLength = 0; for (int i = 0; (i < getSize()); i++) { if (getAt(i) == 9) { iColumnCount++; if (iColumnCount == iColumnNumber) { iStart = i + 1; continue; } } if (iColumnCount > iColumnNumber) break; else if (iColumnCount == iColumnNumber) iLength++; } if ((iStart >= getSize()) || (iLength == 0)) ba.setSize(0); else { ba.setSize(iLength); ba.copy(0, *this, iStart, iLength); } return ba; } char AffxByteArray::parseChar() const { if (getLength() > 0) {return getAt(0);} else return 0; } // Convert the byte array into an int value. int AffxByteArray::parseInt() const { char *endPtr = NULL; return (int) strtol(this->toString().c_str(), &endPtr, 0); } bool AffxByteArray::parsebool() const { bool b = false; AffxByteArray ba(*this); ba.toLowerCase(); ba.trimTabs(); if ((ba.equals("1")) || (ba.equals("y")) || (ba.equals("yes")) || (ba.equals("true")) || (ba.equals("t"))) {b = true;} else if ((ba.equals("")) || (ba.equals("0")) || (ba.equals("n")) || (ba.equals("no")) || (ba.equals("false")) || (ba.equals("f"))) {b = false;} return b; } AffxByteArray& AffxByteArray::trimTabs() { int i = 0; int iStart = 0; int iEnd = getSize(); for (i = 0; (i < getSize()); i++) { if (getAt(i) <= ' ') {iStart++;} else {break;} } for (i = (getSize() - 1); (i >= 0); i--) { if (getAt(i) <= ' ') {iEnd--;} else {break;} } int iLength = iEnd - iStart; if (iLength <= 0) { setSize(0); } else { copy(0, *this, iStart, iLength); setSize(iLength); } return *this; } AffxByteArray& AffxByteArray::trim() { int i = 0; int iStart = 0; int iEnd = getSize(); int iLength = 0; for (i = 0; (i < getSize()); i++) { if ((getAt(i) <= 32) && (getAt(i) != 9)) iStart++; else break; } for (i = (getSize() - 1); (i >= 0); i--) { if ((getAt(i) <= 32) && (getAt(i) != 9)) iEnd--; else break; } iLength = iEnd - iStart; if (iLength <= 0) { setSize(0); } else { copy(0, *this, iStart, iLength); setSize(iLength); } return *this; } AffxString AffxByteArray::toString() const { AffxString str; if (getSize() > 0) { char *psz = (char *)malloc(getSize() + 1); #if defined WIN32 && !defined WIN64 strncpy_s(psz, getSize() + 1, (char *)getData(), getSize()); #else strncpy(psz, (char *)getData(), getSize()); #endif psz[getSize()] = 0; str = psz; free(psz); } return str; } bool AffxByteArray::startsWith(const AffxString& strCompare) { bool bStartsWith = false; int iCompareLength = strCompare.getLength(); if (getSize() >= iCompareLength) { bStartsWith = true; for (int iIndex = 0; (iIndex < iCompareLength); iIndex++) { if (getAt(iIndex) != strCompare.getAt(iIndex)) { bStartsWith = false; break; } } } return bStartsWith; } AffxString AffxByteArray::substring(int iIndex, int iEndIndex) { AffxByteArray ba; if (iEndIndex == -1) iEndIndex = getSize(); int iLength = iEndIndex - iIndex; if (iLength < 0) iLength = 0; ba.setSize(iLength); ba.copy(0, *this, iIndex, iLength); return ba.toString(); } AffxByteArray& AffxByteArray::toLowerCase() { int iCount = getSize(); char by = 0; int iOffset = 'a' - 'A'; for (int iIndex = 0; (iIndex < iCount); iIndex++) { by = getAt(iIndex); if ((by >= 'A') && (by <= 'Z')) { by += iOffset; setAt(iIndex, by); } } return *this; } AffxByteArray& AffxByteArray::toUpperCase() { int iCount = getSize(); char by = 0; int iOffset = 'a' - 'A'; for (int iIndex = 0; (iIndex < iCount); iIndex++) { by = getAt(iIndex); if ((by >= 'a') && (by <= 'z')) { by -= iOffset; setAt(iIndex, by); } } return *this; } void AffxByteArray::trimInternal() { int iCount = getSize(); char by = 0; for (int iIndex = 0; (iIndex < iCount); iIndex++) { by = getAt(iIndex); if (by <= 32) { copy(iIndex, *this, (iIndex + 1), (iCount - (iIndex + 1))); iCount--; } } setSize(iCount); } AffxString AffxByteArray::firstWord(int iOffset) { int iIndex = 0; AffxByteArray ba; iIndex = iOffset; int iStartIndex = 0; int iNewSize = 0; int iCount = getSize(); if (iOffset < iCount) { while ((iIndex < iCount) && (getAt(iIndex) <= 32)) iIndex++; iStartIndex = iIndex; while ((iIndex < iCount) && (getAt(iIndex) > 32)) { iIndex++; iNewSize++; } ba.setSize(iNewSize); ba.copy(0, *this, iStartIndex, iNewSize); } return ba.toString(); } AffxByteArray AffxByteArray::reverseComplement() { AffxByteArray ba = *this; AffxString strComplement = "tvghefcdijmlknopqysaabwxrz"; char by = 0; int iCount = getSize(); int iIndex = 0; ba.toLowerCase(); if (((iCount / 2) * 2) != iCount) { iIndex = (iCount / 2); ba.setAt(iIndex, strComplement.getAt(ba.getAt(iIndex) - 'a')); } for (int i = 0; (i < (iCount / 2)); i++) { iIndex = (iCount - i - 1); by = ba.getAt(i); ba.setAt(i, strComplement.getAt(ba.getAt(iIndex) - 'a')); ba.setAt(iIndex, strComplement.getAt(by - 'a')); } return ba; } double AffxByteArray::parseDouble() const { char *endPtr = NULL; double d = strtod(toString().c_str(), &endPtr); if ((equals("1.#IND")) || (equals("-1.#IND")) || (equals("1.#QNAN")) || (equals("nan"))) {d = numeric_limits::quiet_NaN();} if ((equals("1.#INF")) || (equals("Inf")) || (equals("inf"))) {d = numeric_limits::infinity();} if ((equals("-1.#INF")) || (equals("-Inf")) || (equals("-inf"))) {d = -numeric_limits::infinity();} return d; } AffxByteArray& AffxByteArray::append(char bytes[], int offset, int len) { // try // { int oldcount = m_nSize; int newcount = m_nSize + len; if (newcount > m_nMaxSize) { setSize(newcount); } memcpy(((char*)m_pData + oldcount), ((char *)bytes + offset), len); // System.arraycopy(bytes, offset, value, count, len); m_nSize = newcount; // } // catch (Throwable e) {AffxLog.throwable(e);} return *this; } bool AffxByteArray::nextLine(AffxByteArray& ba) { bool bSuccessful = false; int iLength = 0; ba.setSize(255); int iStart = m_iOffset; int iIndex = m_iOffset; if (getSize() > 0) { int i = 0; int iLast = 0; while ((iIndex < getSize()) && ((i = (int)m_pData[iIndex]) != '\n') && ((i = (int)m_pData[iIndex]) != '\r')) { iIndex++; iLast = i; } if ((i == '\r') && ((iIndex + 1) < getSize()) && ((int)m_pData[iIndex + 1] == '\n')) { iIndex++; iLast = i; i = '\n'; } iLength = iIndex - iStart; m_iOffset += (iLength + 1); bool bRemoveLastByte = ((i == '\n') && (iLast == '\r')); bSuccessful = getLine(iStart, iIndex, iLength, ba, bRemoveLastByte); ba.m_iOffset = 0; } return bSuccessful; } bool AffxByteArray::isValid(bool bParameterFile) { bool bValid = false; trim(); if (getLength() > 0) { if (!bParameterFile) {bValid = true;} else if (getAt(0) != ';') {bValid = true;} // ';' } return bValid; } bool AffxByteArray::getLine(int iStart, int iIndex, int iLength, AffxByteArray& ba, bool bRemoveLastByte) { bool bSuccessful = false; if ((iIndex >= getSize()) && (iLength == 0)) { ba.setSize(0); } else { bSuccessful = true; if (bRemoveLastByte) { if (iLength > 0) { iLength--; } } ba.setSize(iLength); memcpy((void*)(ba.m_pData), (void*)(m_pData + iStart), iLength); //System.arraycopy(value, iStart, ba.value, 0, iLength); } return bSuccessful; } AffxString AffxByteArray::getWord(int iWordIndex) { int iIndex = 0; int iOffset = 0; int iWordFound = 0; AffxByteArray ba; iIndex = iOffset; while ((iIndex < length()) && (iWordFound < iWordIndex)) { while ((iIndex < length()) && (getAt(iIndex) <= 32)) iIndex++; ba.setSize(0); if (iIndex < length()) iWordFound++; while ((iIndex < length()) && (getAt(iIndex) > 32)) { ba.append(getAt(iIndex)); iIndex++; } } if (iWordFound < iWordIndex) ba.setSize(0); return ba.toString(); } int AffxByteArray::append(char by) { int nOldSize = m_nSize; setSize(m_nSize + 1); memcpy(m_pData + nOldSize, &by, 1 * sizeof(char)); return nOldSize; } int AffxByteArray::parameterCount() { int iIndex = 0; int iOffset = 0; int iWordFound = 0; bool bQuotedWord = false; iIndex = iOffset; while ((iIndex < getLength())) { while ((iIndex < getLength()) && (getAt(iIndex) <= ' ')) { iIndex++; } if ((iIndex < getLength()) && (!bQuotedWord)) { if (getAt(iIndex) == '"') {bQuotedWord = true; iIndex++;} if (iIndex < getLength()) { iWordFound++; } } while ((iIndex < getLength()) && (getAt(iIndex) > ' ')) { iIndex++; } if (((iIndex - 1) < getLength()) && (getAt(iIndex - 1) == '"')) {bQuotedWord = false;} } return iWordFound; } int AffxByteArray::csvParameterCount() { int iIndex = 0; int iOffset = 0; int iWordFound = 0; bool bQuotedWord = false; iIndex = iOffset; while ((iIndex < getLength())) { while ((iIndex < getLength()) && (getAt(iIndex) == ',')) { iIndex++; } if ((iIndex < getLength()) && (!bQuotedWord)) { if (getAt(iIndex) == '"') {bQuotedWord = true; iIndex++;} if (iIndex < getLength()) { iWordFound++; } } while ((iIndex < getLength()) && (getAt(iIndex) != ',')) { iIndex++; } if (((iIndex - 1) < getLength()) && (getAt(iIndex - 1) == '"')) {bQuotedWord = false;} } return iWordFound; } AffxByteArray AffxByteArray::getParameter(int iWordIndex) { int iIndex = 0; int iOffset = 0; int iWordFound = 0; bool bQuotedWord = false; AffxByteArray ba; ba.setSize(0); iIndex = iOffset; while ((iIndex < getLength()) && (iWordFound < iWordIndex)) { while ((iIndex < getLength()) && (m_pData[iIndex] <= 32)) iIndex++; if ((iIndex < getLength()) && (!bQuotedWord)) { if (getAt(iIndex) == '"') {bQuotedWord = true; iIndex++;} ba.setSize(0); if (iIndex < getLength()) {iWordFound++;} } if (!bQuotedWord) { while ((iIndex < getLength()) && (m_pData[iIndex] > 32)) { ba.append(m_pData[iIndex]); iIndex++; } } else { while (iIndex < getLength()) { if (((iIndex + 1) < getLength()) && (getAt(iIndex) == '"') && (getAt(iIndex + 1) <= 32)) {bQuotedWord = false; iIndex++; break;} if (((iIndex + 1) == getLength()) && (getAt(iIndex) == '"')) {bQuotedWord = false; iIndex++; break;} ba.append(m_pData[iIndex]); iIndex++; } } } if (iWordFound < iWordIndex) ba.setSize(0); return ba; } AffxByteArray AffxByteArray::getCsvParameter(int iWordIndex) { int iIndex = 0; int iOffset = 0; int iWordFound = 0; bool bQuotedWord = false; AffxByteArray ba; ba.setSize(0); iIndex = iOffset; while ((iIndex < getLength()) && (iWordFound < iWordIndex)) { while ((iIndex < getLength()) && (m_pData[iIndex] == ',')) iIndex++; if ((iIndex < getLength()) && (!bQuotedWord)) { if (getAt(iIndex) == '"') {bQuotedWord = true; iIndex++;} ba.setSize(0); if (iIndex < getLength()) {iWordFound++;} } if (!bQuotedWord) { while ((iIndex < getLength()) && (m_pData[iIndex] != ',')) { ba.append(m_pData[iIndex]); iIndex++; } } else { while (iIndex < getLength()) { if (((iIndex + 1) < getLength()) && (getAt(iIndex) == '"') && (getAt(iIndex + 1) == ',')) {bQuotedWord = false; iIndex++; break;} if (((iIndex + 1) == getLength()) && (getAt(iIndex) == '"')) {bQuotedWord = false; iIndex++; break;} ba.append(m_pData[iIndex]); iIndex++; } } } if (iWordFound < iWordIndex) ba.setSize(0); return ba; } int AffxByteArray::getWordCount() { int iIndex = 0; int iOffset = 0; int iWordFound = 0; iIndex = iOffset; while ((iIndex < getLength())) { while ((iIndex < getLength()) && (getAt(iIndex) <= ' ')) { iIndex++; } if (iIndex < getLength()) { iWordFound++; } while ((iIndex < getLength()) && (getAt(iIndex) > ' ')) { iIndex++; } } return iWordFound; } AffxByteArray AffxByteArray::getWord(int iWordIndex, AffxByteArray& ba) { int iIndex = 0; int iOffset = 0; int iWordFound = 0; ba.setSize(0); iIndex = iOffset; while ((iIndex < getLength()) && (iWordFound < iWordIndex)) { while ((iIndex < getLength()) && (m_pData[iIndex] <= 32)) iIndex++; ba.setSize(0); if (iIndex < getLength()) iWordFound++; while ((iIndex < getLength()) && (m_pData[iIndex] > 32)) { ba.append(m_pData[iIndex]); iIndex++; } } if (iWordFound < iWordIndex) ba.setSize(0); return ba; } AffxByteArray AffxByteArray::nextColumn(AffxByteArray& ba) { int iLength = 0; ba.setSize(255); int iStart = m_iOffset; int iIndex = m_iOffset; if (getSize() > 0) { int i = 0; while ((iIndex < getSize()) && ((i = (int)m_pData[iIndex]) != 9)) { iIndex++; } iLength = iIndex - iStart; m_iOffset += (iLength + 1); nextColumn(iStart, iIndex, iLength, ba); } ba.m_iFieldOffset = 0; return ba; } bool AffxByteArray::nextColumn(int iStart, int iIndex, int iLength, AffxByteArray& ba) { bool bSuccessful = false; if ((iIndex >= getSize()) && (iLength == 0)) { ba.setSize(0); } else { bSuccessful = true; ba.setSize(iLength); memcpy((void*)(ba.m_pData), (void*)(m_pData + iStart), iLength); } return bSuccessful; } AffxByteArray AffxByteArray::nextField(AffxByteArray& ba) { int iLength = 0; ba.setSize(255); int iStart = m_iFieldOffset; int iIndex = m_iFieldOffset; if (getSize() > 0) { int i = 0; while ((iIndex < getSize()) && ((i = (int)m_pData[iIndex]) != ',')) { iIndex++; } iLength = iIndex - iStart; m_iFieldOffset += (iLength + 1); nextField(iStart, iIndex, iLength, ba); } return ba; } bool AffxByteArray::nextField(int iStart, int iIndex, int iLength, AffxByteArray& ba) { bool bSuccessful = false; if ((iIndex >= getSize()) && (iLength == 0)) { ba.setSize(0); } else { bSuccessful = true; ba.setSize(iLength); memcpy((void*)(ba.m_pData), (void*)(m_pData + iStart), iLength); } return bSuccessful; } // This function is used by AffxArray for sorting and searching. int AffxByteArray::compareTo(const AffxByteArray &obj, int iCompareCode) const { int iCompareResult = 0; int iThis = 0; int iThat = 0; switch(iCompareCode) { case 0: iCompareResult = compareTo(obj); break; case 1: iThis = parseInt(); iThat = obj.parseInt(); if (iThis > iThat) iCompareResult = 1; else if (iThis < iThat) iCompareResult = -1; else iCompareResult = 0; break; } return iCompareResult; } int AffxByteArray::compareTo(const AffxByteArray & that) const { int iResult = 0; int iLength = __minimum(this->m_nSize, that.m_nSize); bool bEqual = true; for (int iIndex = 0; (iIndex < iLength); iIndex++) { if (this->m_pData[iIndex] != that.m_pData[iIndex]) { bEqual = false; iResult = (this->m_pData[iIndex] - that.m_pData[iIndex]); break; } } if (bEqual) {iResult = (this->m_nSize - that.m_nSize);} return iResult; } int AffxByteArray::compareTo(const AffxString & that) const { int iResult = 0; int iLength = __minimum(this->m_nSize, (int)that.length()); bool bEqual = true; for (int iIndex = 0; (iIndex < iLength); iIndex++) { if (this->m_pData[iIndex] != that.charAt(iIndex)) { bEqual = false; iResult = (this->m_pData[iIndex] - that.charAt(iIndex)); break; } } if (bEqual) {iResult = (this->m_nSize - (int)that.length());} return iResult; } void AffxByteArray::replace(char c1, char c2) { for (int iIndex = 0; (iIndex < getLength()); iIndex++) { if (getAt(iIndex) == c1) {setAt(iIndex, c2);} } } void AffxByteArray::replace(const AffxString& str1, const AffxString& str2) { AffxString str1Temp = str1; AffxByteArray ba; int iIndex = indexOf(str1Temp); while (iIndex != -1) { ba.setSize(0); ba.append(substring(0, iIndex)); ba.append(str2); ba.append(substring(iIndex + (int)str1Temp.length())); assign(ba.toString()); iIndex = indexOf(str1Temp); } } bool AffxByteArray::isAllWhitespace() { bool bAllWhitespace = true; for (int i = 0; (i < getLength()); i++) { if (getAt(i) > ' ') {bAllWhitespace = false; break;} } return bAllWhitespace; } int AffxByteArray::getMaxRun(char byBase) { int iCurrentRun = 0; int iMaxRun = 0; for (int iBaseIndex = 0; (iBaseIndex < getLength()); iBaseIndex++) { if (getAt(iBaseIndex) == byBase) {iCurrentRun++;} else { if (iCurrentRun > iMaxRun) {iMaxRun = iCurrentRun;} if (getAt(iBaseIndex) == byBase) {iCurrentRun = 1;} else {iCurrentRun = 0;} } } if (iCurrentRun > iMaxRun) {iMaxRun = iCurrentRun;} return (iMaxRun); } int AffxByteArray::getCountInWindow(char byBase, int iWindowSize) { int iBestCount = 0; int iCount = 0; iWindowSize = __minimum(iWindowSize, getLength()); for (int iBaseIndex = 0; (iBaseIndex <= (getLength() - iWindowSize)); iBaseIndex++) { iCount = 0; for (int iWindowOffset = 0; (iWindowOffset < iWindowSize); iWindowOffset++) { if (getAt(iBaseIndex + iWindowOffset) == byBase) {iCount++;} } if (iCount > iBestCount) {iBestCount = iCount;} } return (iBestCount); } int AffxByteArray::getCount(char byBase) { int iCount = 0; for (int iBaseIndex = 0; (iBaseIndex < getLength()); iBaseIndex++) { if (getAt(iBaseIndex) == byBase) {iCount++;} } return (iCount); } // Read a the entire file into a byte array. bool AffxByteArray::readFile(const AffxString& strFileName) { std::ifstream file; bool bSuccessful = false; unsigned int uiNumberBytes = Fs::fileSize(strFileName); Fs::aptOpen(file, strFileName); setSize(uiNumberBytes); if (uiNumberBytes > 0) { memset(getData(), 0, uiNumberBytes); file.read(getData(), uiNumberBytes); bSuccessful = true; } file.close(); setOffset(0); return bSuccessful; } affxparser/src/fusion/util/AffxByteArray.h0000644000175200017520000001303314516003651021665 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffxByteArray_h_ #define _AffxByteArray_h_ /** * @file AffxByteArray.h * * @brief This header contains the AffxByteArray class definition. */ #include "util/AffxConv.h" #include "util/AffxString.h" // /** * @brief A class for managing large byte arrays. * */ class AffxByteArray { public: void lockBuffer(void) {m_bLocked = true;} void unlockBuffer(void) {m_bLocked = false;} bool isLocked(void) {return m_bLocked;} void clear(void); void empty(void); bool isEmpty(void) {return (m_nMaxSize == 0);} bool isClear(void) {return (m_nSize == 0);} const AffxByteArray& operator=(const AffxByteArray& ba); int getInt(int &iIndex); bool isAllWhitespace(); void replace(const AffxString& str1, const AffxString& str2); void replace(char c1, char c2); // Construction AffxByteArray(); AffxByteArray(const AffxByteArray& ba); AffxByteArray(const AffxString& ba); // Attributes int getAllocLength() const; int getSize() const; int getUpperBound() const; void setSize(int nNewSize, int nGrowBy = -1); // Operations // Clean up void freeExtra(); void removeAll(); // Accessing elements char getAt(int nIndex) const; void setAt(int nIndex, char newElement); char& elementAt(int nIndex); // Direct Access to the element data (may return NULL) const char* getData() const; char* getData(); // Potentially growing the array void setAtGrow(int nIndex, char newElement); int add(char newElement); int append(const AffxByteArray& src); int append(const AffxString& src); int append(int i) {return append(::getInt(i));} int append(unsigned int i) {return append(::getUnsignedInt(i));} int append(double d) {return append(::getDouble(d, 6));} AffxByteArray& append(char bytes[], int offset, int len); void copy(const AffxByteArray& src); void assign(const AffxString& str) {setSize(0); append(str);} // overloaded operator helpers char operator[](int nIndex) const; char& operator[](int nIndex); // Operations that move elements around void insertAt(int nIndex, char newElement, int nCount = 1); void removeAt(int nIndex, int nCount = 1); void insertAt(int nStartIndex, AffxByteArray* pNewArray); bool operator==(const AffxByteArray &ba); // bool operator==(AffxByteArray& ba) {return operator==(ba);} bool operator!=(const AffxByteArray& ba) {return !(operator==(ba));} void copy(int iDestStart, const AffxByteArray& src, int iSrcStart, int iLength); void setOffset(int iOffset); bool getLine(AffxByteArray& ba); int getColumnCount(); AffxByteArray& getColumn(int iColumnNumber, AffxByteArray& ba); int parseInt() const; char parseChar() const; bool parsebool() const; double parseDouble() const; AffxByteArray& trim(); AffxByteArray& trimTabs(); AffxString toString() const; bool startsWith(const AffxString& strCompare); AffxString substring(int iIndex, int iEndIndex = -1); AffxByteArray& toLowerCase(); AffxByteArray& toUpperCase(); void trimInternal(); AffxString firstWord(int iOffset = 0); AffxByteArray reverseComplement(); bool equals(const AffxString &str) const; bool equalsIgnoreCase(const AffxString &str); int length() const {return getSize();} int getLength() const {return getSize();} bool nextLine(AffxByteArray& ba); int getWordCount(); AffxString getWord(int iWordIndex); int parameterCount(); AffxByteArray getParameter(int iParameterIndex); int csvParameterCount(); AffxByteArray getCsvParameter(int iParameterIndex); int append(char by); AffxByteArray getWord(int iWordIndex, AffxByteArray& ba); int indexOf(const AffxString& str) {return toString().indexOf(str);} bool isValid() {return isValid(false);} bool isValid(bool bParameterFile); AffxByteArray nextColumn(AffxByteArray& ba); int compareTo(const AffxString &that) const ; int compareTo(const AffxByteArray &that) const; int compareTo(const AffxByteArray &obj, int iCompareCode) const; int getMaxRun(char byBase); int getCountInWindow(char byBase, int iWindowSize); int getCount(char byBase); AffxByteArray nextField(AffxByteArray& ba); bool readFile(const AffxString& strFileName); // Implementation protected: char* m_pData; // the actual array of data int m_nSize; // # of elements (upperBound - 1) int m_nMaxSize; // max allocated int m_nGrowBy; // grow amount bool m_bLocked; int m_iOffset; int m_iFieldOffset; public: ~AffxByteArray(); #ifdef _DEBUG void _ASSERTEValid() const; #endif protected: // local typedefs for class templates typedef char BASE_TYPE; typedef char BASE_ARG_TYPE; private: bool getLine(int iStart, int iIndex, int iLength, AffxByteArray& ba, bool bRemoveLastByte); bool nextColumn(int iStart, int iIndex, int iLength, AffxByteArray& ba); bool nextField(int iStart, int iIndex, int iLength, AffxByteArray& ba); }; #endif // _AffxByteArray_h_ affxparser/src/fusion/util/AffxConv.cpp0000644000175200017520000002452714516003651021235 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file AffxConv.cpp * * @brief This file contains some data conversion routines. */ /** * @brief Some data conversion routines. * */ // #include "util/AffxConv.h" // #include "util/Convert.h" // #include #include #include #include #include #include // #ifdef WIN64 # undef WIN32 #endif using namespace std; /** * Converts a string with commas into an int. * @param str - The string to convert. * @return - int value. */ int getInt(const std::string& str) { std::string strInt; int iLength = (int)str.length(); for (int i = 0; (i < iLength); i++) { if (str[i] != ',') {strInt += str[i];} } return atol(strInt.c_str()); } /** * Converts a string with commas into an unsigned int. * @param str - The string to convert. * @return - unsigned int value. */ unsigned int getUnsignedInt(const std::string& str) { return (unsigned int)getDouble(str); } /** * Converts a string with commas into a double. * @param str - The string to convert. * @return - double value. */ double getDouble(const std::string& str) { std::string strDouble; int iLength = (int)str.length(); for (int i = 0; (i < iLength); i++) { if (str[i] != ',') {strDouble += str[i];} } return atof(strDouble.c_str()); } /** * Converts an int to a string. * @param i - The int to convert. * @param iLength - The length of the output string. * @return - string value. */ std::string getInt(int i, unsigned int iLength) { std::string str = getInt(i); while (str.length() < iLength) {str = "0" + str;} return str; } /** * Converts an int to a string. * @param i - The int to convert. * @param bCommas - true if you want commas in the string, else false (defaults to false). * @return - string value. */ std::string getInt(int i, bool bCommas) { char szValue[64]; #if defined(WIN32) && !defined(__MINGW32__) sprintf_s(szValue, 64, "%d", i); #else sprintf(szValue, "%d", i); #endif if (i != i) {return "nan";} if (!bCommas) {return szValue;} return formatString(szValue, 0, bCommas); } /** * Converts an int64 to a string. * @param l - The int64 to convert. * @param bCommas - true if you want commas in the string, else false (defaults to false). * @return - string value. */ /* std::string getLong64(__int64 l, bool bCommas) { char szValue[64]; #ifdef WIN32 _i64toa_s(l, szValue, 64, 10); #else _i64toa(l, szValue, 10); #endif return convertString(szValue, 0, bCommas); } */ /** * Converts an unsigned int64 to a string. * @param l - The unsigned int64 to convert. * @param bCommas - true if you want commas in the string, else false (defaults to false). * @return - string value. */ /* std::string getUnsignedLong64(unsigned __int64 l, bool bCommas) { char szValue[64]; #ifdef WIN32 _ui64toa_s(l, szValue, 64, 10); #else _ui64toa(l, szValue, 10); #endif return convertString(szValue, 0, bCommas); } */ /** * Converts an unsigned int to a string. * @param i - The unsigned int to convert. * @param bCommas - true if you want commas in the string, else false (defaults to false). * @return - string value. */ std::string getUnsignedInt(unsigned int i, bool bCommas) { char szValue[64]; #ifdef WIN32 sprintf_s(szValue, 64, "%u", i); #else sprintf(szValue, "%u", i); #endif if (!bCommas) {return szValue;} return formatString(szValue, 0, bCommas); } /** * Converts a double to a string. * @param d - The double to convert. * @return - string value. */ std::string getDouble(double d) { std::string str = ToStr(d); if (str == "-inf") return str; if (str == "inf") return str; if (str == "nan") return str; char szValue[64]; #ifdef WIN32 sprintf_s(szValue, 64, "%.16g", d); #else sprintf(szValue, "%.16g", d); #endif return szValue; } /** * Converts a double to a string. * @param d - The double to convert. * @param iDecimalPlaces - The number of decimal places (double will be rounded). * @param bCommas - true if you want commas in the string, else false (defaults to false). * @return - string value. */ std::string getDouble(double d, int iDecimalPlaces, bool bCommas) { d = roundDouble(d, iDecimalPlaces); std::string str = ToStr(d); if (str == "-inf") return str; if (str == "inf") return str; if (str == "nan") return str; char szValue[64]; #ifdef WIN32 sprintf_s(szValue, 64, "%.16g", d); #else sprintf(szValue, "%.16g", d); #endif if (d != d) {return "nan";} return formatString(szValue, iDecimalPlaces, bCommas); } /** * Formats a number string * @param szValue - The const char* to convert. * @param iDecimalPlaces - The number of decimal places. * @param bCommas - true if you want commas in the string, else false (defaults to false). * @return - string value. */ std::string formatString(const char* szValue, int iDecimalPlaces, bool bCommas) { char szBuffer[64]; int iValue, iBuffer, iComma; int iCount, iMod; int i, iLeftofDecimal; iValue = 0, iBuffer = 0, iComma = 0; iCount = (int)(strlen(szValue)); iLeftofDecimal = 0; for (i = 0; (i < iCount); i++) { if ((szValue[i] == 'e') || (szValue[i] == 'E')) { // The number is probably in scientific notation, return as is. return szValue; } } for (i = 0; (i < iCount); i++) { if (szValue[i] == '.') {break;} else {iLeftofDecimal++;} } if (szValue[iValue] == '-') { iMod = (iLeftofDecimal - 1) % 3; szBuffer[iBuffer] = '-'; iValue++; iBuffer++; } else {iMod = iLeftofDecimal % 3;} for (iComma = 0; (iComma < iMod); iComma++) { szBuffer[iBuffer] = szValue[iValue]; iBuffer++; iValue++; } if ((iMod > 0) && (iValue < iLeftofDecimal)) { if (bCommas) { szBuffer[iBuffer] = ','; iBuffer++; } } iComma = 0; bool bDecimal = false; int iNumberPlaces = 0; for (; (iValue < iCount); iValue++) { if (szValue[iValue] == '.') {bDecimal = true;} if ((iComma == 3) && (!bDecimal)) { iComma = 0; if (bCommas) { szBuffer[iBuffer] = ','; iBuffer++; } } if ((bDecimal) && (iDecimalPlaces == 0)) {break;} szBuffer[iBuffer] = szValue[iValue]; iBuffer++; iComma++; if (bDecimal) { iNumberPlaces++; if (iNumberPlaces > iDecimalPlaces) {break;} } } szBuffer[iBuffer] = '\0'; std::string str = szBuffer; int iFindIndex = (int)str.find("."); if (iFindIndex != -1) { for (int iIndex = ((int)str.length() - 1); (iIndex >= iFindIndex); iIndex--) { if ((szBuffer[iIndex] == '0') || (szBuffer[iIndex] == '.')) {szBuffer[iIndex] = 0;} else {break;} } } if ((szBuffer[0] == '-') && (szBuffer[1] == '0') && (szBuffer[2] == 0)) { szBuffer[0] = '0'; szBuffer[1] = 0; } return szBuffer; } /** * Converts Month int to Month string. * @param iMonth - The Month number. * @return - string value. */ std::string getMonth(int iMonth) { std::string str; std::string strMonths = "JanFebMarAprMayJunJulAugSepOctNovDec"; if ((iMonth >= 1) && (iMonth <= 12)) { iMonth -= 1; iMonth = iMonth * 3; str = strMonths.substr(iMonth, 3); } return str; } /** * Converts Month string to Month int. * @param strMonth - The Month string. * @return - int value. */ int getMonth(const std::string& strMonth) { int i; std::string strMonths = "JanFebMarAprMayJunJulAugSepOctNovDec"; if (strMonth == "") {i = 0;} else { i = (int)strMonths.find(strMonth); if (i == -1) {i = 0;} else {i = (i / 3) + 1;} } return i; } /** * Round the double and return it. * @param d - The double to round. * @param iDecimalPlaces - The number of decimal places to round to. * @return - rounded double value. */ double roundDouble(double val,int places) { double v_int,v_frac; double place_10,v_int10,v_frac10; // split into parts. v_frac=modf(val,&v_int); // create our shift place_10=pow(10.0,places); // shift the frac part up and truncate v_frac10=modf(v_frac*place_10,&v_int10); // std-c++ round says that it rounds away from zero. // apply the rounding of the last digit. if (v_frac10>=0.5) { v_int10=v_int10+1.0; } else if (v_frac10<=-0.5) { v_int10=v_int10-1.0; } // shift fract back down. v_frac=v_int10/place_10; // put back together. return (v_int+v_frac); } /** * Check for special characters is a string using in a SQL WHERE xxx LIKE xxx clause. * @param str - The string to check. * @return - The formatted string. */ std::string likeString(const std::string& str) { char c; unsigned int i = 0; std::string strLike; for(i = 0; (i < str.length()); i++) { c = str[i]; // Check for a single quote (ascii 39), put an extra quote if it is found if (c == 39) { strLike += "'"; strLike += c; } // to avoid the error message 'FOR UPDATE clause allowed only for DECLARE CURSOR', // do not use the LIKE clause for search strings that contain parentheses // make partial filter using right and left side around parentheses else if ((c == '(') || (c == ')')) { strLike += '_'; } // LIKE uses wildcards: % = any number of chars, _ = one char, [ = start of literal block // First copy string, enclosing wildcards in square brackets, treating them as literals // Also check for a single quote (ascii 39), put an extra quote if it is found else if ((c == '%') || (c == '_') || (c == '[')) { strLike += '['; strLike += c; strLike += ']'; } else { strLike += c; } } return strLike; } affxparser/src/fusion/util/AffxConv.h0000644000175200017520000000362514516003651020676 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file AffxConv.h * * @brief This header contains some data conversion routines. */ /** * @brief Some data conversion routines. * */ #ifndef __affxconv_h__ #define __affxconv_h__ #include #include // // NOTE: for visual access purposes only class AffxConv { public: AffxConv(); }; std::string getUnsignedInt(unsigned int i, bool bCommas = false); std::string getInt(int i, bool bCommas = false); int getInt(const std::string& str); unsigned int getUnsignedInt(const std::string& str); std::string getInt(int i, unsigned int iLength); std::string getDouble(double d); std::string getDouble(double d, int iDecimalPlaces, bool bCommas = false); double getDouble(const std::string& strDouble); std::string formatString(const char* szValue, int iDecimalPlaces, bool bCommas = false); std::string getMonth(int iMonth); int getMonth(const std::string& strMonth); double roundDouble(double d, int iDecimalPlaces = 0); std::string likeString(const std::string& str); #endif // __affxconv_h__ affxparser/src/fusion/util/AffxMultiDimensionalArray.h0000644000175200017520000010625614516003651024251 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffxMultiDimensionalArray_h_ #define _AffxMultiDimensionalArray_h_ /** * @file AffxMultiDimensionalArray.h * * @brief This header contains the AffxMultiDimensionalArray class definition. */ #include "file/TsvFile/TsvFile.h" #include "util/AffxArray.h" #include "util/Err.h" #include "util/Util.h" // #include "portability/affy-base-types.h" #include #include #include // using namespace std; /** * @brief A class for managing one, two, or three dimensional arrays of primitive data types. * */ template class AffxMultiDimensionalArray { private: TYPE* m_ar; SIZE m_iXDimension; SIZE m_iYDimension; SIZE m_iZDimension; // to make nth_element() calls look nicer TYPE* begin() { return m_ar; } TYPE* end() { return m_ar + m_iXDimension; } public: static double infinity; TYPE* getPointer() {mustBeVector(); return m_ar;} SIZE getXDimension() {return m_iXDimension;} SIZE getYDimension() {return m_iYDimension;} SIZE getZDimension() {return m_iZDimension;} void convertMatrixToVector() {m_iXDimension = m_iXDimension * m_iYDimension; m_iYDimension = 1;} SIZE length() {return m_iXDimension;} bool isVector() {return ((m_iXDimension > 0) && (m_iYDimension == 1) && (m_iZDimension == 1));} bool isMatrix() {return ((m_iXDimension > 0) && (m_iYDimension > 1) && (m_iZDimension == 1));} void increment(SIZE x) {m_ar[x]++;} AffxMultiDimensionalArray() { m_iXDimension = 1; m_iYDimension = 1; m_iZDimension = 1; m_ar = new TYPE[getCount()]; if (m_ar == NULL) {Err::errAbort("Run is out of memory.");} initialize(); } AffxMultiDimensionalArray(SIZE iXDimension) { m_iXDimension = iXDimension; m_iYDimension = 1; m_iZDimension = 1; m_ar = new TYPE[getCount()]; if (m_ar == NULL) {Err::errAbort("Run is out of memory.");} initialize(); } AffxMultiDimensionalArray(SIZE iXDimension, SIZE iYDimension) { m_iXDimension = iXDimension; m_iYDimension = iYDimension; m_iZDimension = 1; m_ar = new TYPE[getCount()]; if (m_ar == NULL) {Err::errAbort("Run is out of memory.");} initialize(); } AffxMultiDimensionalArray(SIZE iXDimension, SIZE iYDimension, SIZE iZDimension) { m_iXDimension = iXDimension; m_iYDimension = iYDimension; m_iZDimension = iZDimension; m_ar = new TYPE[getCount()]; if (m_ar == NULL) {Err::errAbort("Run is out of memory.");} initialize(); } AffxMultiDimensionalArray(const AffxMultiDimensionalArray& that) { m_iXDimension = that.m_iXDimension; m_iYDimension = that.m_iYDimension; m_iZDimension = that.m_iZDimension; m_ar = new TYPE[getCount()]; if (m_ar == NULL) {Err::errAbort("Run is out of memory.");} memcpy(m_ar, that.m_ar, (getCount() * sizeof(TYPE))); } void initialize(SIZE iXDimension) { if (m_ar != 0) { delete[] m_ar; m_ar = 0; } m_iXDimension = iXDimension; m_iYDimension = 1; m_iZDimension = 1; m_ar = new TYPE[getCount()]; if (m_ar == NULL) {Err::errAbort("Run is out of memory.");} initialize(); } void initialize(SIZE iXDimension, SIZE iYDimension) { if (m_ar != 0) { delete[] m_ar; m_ar = 0; } m_iXDimension = iXDimension; m_iYDimension = iYDimension; m_iZDimension = 1; m_ar = new TYPE[getCount()]; if (m_ar == NULL) {Err::errAbort("Run is out of memory.");} initialize(); } void initialize(SIZE iXDimension, SIZE iYDimension, SIZE iZDimension) { if (m_ar != 0) { delete[] m_ar; m_ar = 0; } m_iXDimension = iXDimension; m_iYDimension = iYDimension; m_iZDimension = iZDimension; m_ar = new TYPE[getCount()]; if (m_ar == NULL) {Err::errAbort("Run is out of memory.");} initialize(); } const AffxMultiDimensionalArray& operator=(AffxMultiDimensionalArray& that) { if (this == &that) return that; if (m_ar != 0) { delete[] m_ar; m_ar = 0; } m_iXDimension = that.m_iXDimension; m_iYDimension = that.m_iYDimension; m_iZDimension = that.m_iZDimension; m_ar = new TYPE[getCount()]; if (m_ar == NULL) {Err::errAbort("Run is out of memory.");} memcpy(m_ar, that.m_ar, (getCount() * sizeof(TYPE))); return *this; } virtual ~AffxMultiDimensionalArray() { clear(); } void clear() { if (m_ar != 0) { delete[] m_ar; m_ar = 0; m_iXDimension = 0; m_iYDimension = 0; m_iZDimension = 0; } } static void defineInfinity() { double d1 = 1.0; double d2 = 0.0; infinity = (d1 / d2); } bool isAllInfinity() { bool b = true; for (SIZE iIndex = 0; (iIndex < length()); iIndex++) { if (get(iIndex) != infinity) { b = false; break; } } return b; } SIZE getCount() { return (m_iXDimension * m_iYDimension * m_iZDimension); } SIZE compareTo(AffxMultiDimensionalArray& that) { SIZE iResult = 0; SIZE iLength = min(getCount(), that.getCount()); bool bEqual = true; for (SIZE iIndex = 0; (iIndex < iLength); iIndex++) { if (m_ar[iIndex] != that.m_ar[iIndex]) { bEqual = false; iResult = (m_ar[iIndex] - that.m_ar[iIndex]); break; } } if (bEqual) {iResult = (getCount() - that.getCount());} return iResult; } bool equals(AffxMultiDimensionalArray& that) { bool bEquals = false; if (getCount() == that.getCount()) { for (SIZE i = 0; (i < getCount()); i++) { bEquals = true; if (m_ar[i] != that.m_ar[i]) { bEquals = false; break; } } } return bEquals; } void initialize() { memset(m_ar, 0, getCount() * sizeof(TYPE)); } void set_unchecked(SIZE x, TYPE t) { m_ar[x] = t; } void set(SIZE x, TYPE t) { SIZE iIndex = x; if ((iIndex < 0) || (iIndex >= getCount())) {Err::errAbort("AffxMultiDimensionalArray::set(x, TYPE) index out of bounds. " + ::getInt(x));} m_ar[iIndex] = t; } void* getPointer(SIZE x) { SIZE iIndex = x; if ((iIndex < 0) || (iIndex >= getCount())) {Err::errAbort("AffxMultiDimensionalArray::getPointer(x) index out of bounds. " + ::getInt(x));} return &m_ar[x]; } void set(SIZE x, SIZE y, TYPE t) { SIZE iIndex = ((x * m_iYDimension) + y); if ((iIndex < 0) || (iIndex >= getCount())) {Err::errAbort("AffxMultiDimensionalArray::set(x, y, TYPE) index out of bounds. " + ::getInt(x) + ", " + ::getInt(y));} m_ar[iIndex] = t; } void* getPointer(SIZE x, SIZE y) { SIZE iIndex = ((x * m_iYDimension) + y); if ((iIndex < 0) || (iIndex >= getCount())) {Err::errAbort("AffxMultiDimensionalArray::getPointer(x, y) index out of bounds. " + ::getInt(x) + ", " + ::getInt(y));} return &m_ar[iIndex]; } void set(SIZE x, SIZE y, SIZE z, TYPE t) { SIZE iIndex = ((x * m_iYDimension * m_iZDimension) + (y * m_iZDimension) + z); if ((iIndex < 0) || (iIndex >= getCount())) {Err::errAbort("AffxMultiDimensionalArray::set(x, y, z, TYPE) index out of bounds. " + ::getInt(x) + ", " + ::getInt(y) + ", " + ::getInt(z));} m_ar[iIndex] = t; } TYPE get(SIZE x) { SIZE iIndex = x; if ((iIndex < 0) || (iIndex >= getCount())) {Err::errAbort("AffxMultiDimensionalArray::get(x) index out of bounds. " + ::getInt(x));} return m_ar[iIndex]; } TYPE get_unchecked(SIZE x) { return m_ar[x]; } TYPE get(SIZE x, SIZE y) { SIZE iIndex = ((x * m_iYDimension) + y); if ((iIndex < 0) || (iIndex >= getCount())) {Err::errAbort("AffxMultiDimensionalArray::get(x, y) index out of bounds. " + ::getInt(x) + ", " + ::getInt(y));} return m_ar[iIndex]; } TYPE get(SIZE x, SIZE y, SIZE z) { SIZE iIndex = ((x * m_iYDimension * m_iZDimension) + (y * m_iZDimension) + z); if ((iIndex < 0) || (iIndex >= getCount())) {Err::errAbort("AffxMultiDimensionalArray::get(x, y, z) index out of bounds. " + ::getInt(x) + ", " + ::getInt(y) + ", " + ::getInt(z));} return m_ar[iIndex]; } void mustBeVector() { if (!isVector()) { // AffxLog::throwable(new Exception("AffxMultidimensionalArray function must operate on a vector.")); } } void insertAt(SIZE x, TYPE t) { mustBeVector(); AffxMultiDimensionalArray arTemp(*this); if (m_ar != 0) { delete[] m_ar; m_ar = 0; } m_iXDimension++; m_ar = new TYPE[getCount()]; if (m_ar == NULL) {Err::errAbort("Run is out of memory.");} initialize(); if (x > 0) {memcpy(m_ar, arTemp.getPointer(), (x * sizeof(TYPE)));} memcpy((m_ar + x), &t, sizeof(TYPE)); if (x < arTemp.length()) {memcpy((m_ar + x + 1), (arTemp.getPointer() + x), ((arTemp.length() - x) * sizeof(TYPE)));} } void removeAt(SIZE x) { if (length() < 1) return; mustBeVector(); if (x < (length() - 1)) { memmove((m_ar + x), (m_ar + x + 1), ((m_iXDimension - x - 1) * sizeof(TYPE))); } m_iXDimension--; } AffxMultiDimensionalArray unique() { mustBeVector(); AffxMultiDimensionalArray ar(*this); for (SIZE iIndex = 1; (iIndex < ar.length()); iIndex++) { for (SIZE iIndex2 = 0; (iIndex2 < iIndex); iIndex2++) { if (ar.get(iIndex) == ar.get(iIndex2)) { ar.removeAt(iIndex); iIndex--; break; } } } return ar; } AffxMultiDimensionalArray diff() { mustBeVector(); // if (length() < 2) {AffxLog::throwable(new Exception("AffxMultidimensionalArray function diff(int) must operate on a vector with more than one element in it."));} AffxMultiDimensionalArray ar(this->m_iXDimension - 1); for (SIZE iIndex = 1; (iIndex < length()); iIndex++) { ar.set((iIndex - 1), get(iIndex) - get(iIndex - 1)); } return ar; } AffxMultiDimensionalArray rev() { mustBeVector(); AffxMultiDimensionalArray ar(this->length()); for (SIZE iIndex = (length() - 1); (iIndex >= 0); iIndex--) { ar.set(((length() - 1) - iIndex), get(iIndex)); } return ar; } void calculatePointEstimates(double& dPointEstimate, double& dPointEstimateLower, double& dPointEstimateUpper) { if (nonZeroLength() == 0) {dPointEstimate = 0.0; dPointEstimateLower = 0.0; dPointEstimateUpper = 0.0; return;} if (mean() == 0.0) {dPointEstimate = 0.0; dPointEstimateLower = 0.0; dPointEstimateUpper = 0.0; return;} else if (round(mean(), 4) == 1.0) {dPointEstimate = 1.0; dPointEstimateLower = 1.0; dPointEstimateUpper = 1.0; return;} else { AffxMultiDimensionalArray ar(length()); for (SIZE iIndex = 0; (iIndex < ar.length()); iIndex++) { double d = get(iIndex); if (d == 0.0) {d = d + 0.0001;} if (d == 1.0) {d = d - 0.0001;} ar.set(iIndex, log(d/(1-d))); } double dMean = ar.mean(); double dSd = ar.sd(); double dMeanUpper = dMean+2*dSd/sqrt((double)ar.length()); double dMeanLower = dMean-2*dSd/sqrt((double)ar.length()); dPointEstimateUpper = exp(dMeanUpper)/(1+exp(dMeanUpper)); dPointEstimate = exp(dMean)/(1+exp(dMean)); dPointEstimateLower = exp(dMeanLower)/(1+exp(dMeanLower)); } } SIZE nonZeroLength() { SIZE iCount = 0; for (SIZE iIndex = 0; (iIndex < length()); iIndex++) { if (get(iIndex) != 0.0) {iCount++;} } return iCount; } double mean() { double dMean = 0.0; mustBeVector(); if (length() > 0) { double dSum = 0.0; for (SIZE iIndex = 0; (iIndex < length()); iIndex++) { dSum += get(iIndex); } dMean = dSum / (double)length(); } return dMean; } double mean(SIZE iLength) { double dMean = 0.0; mustBeVector(); if (iLength > 0) { double dSum = 0.0; for (SIZE iIndex = 0; (iIndex < iLength); iIndex++) { dSum += get(iIndex); } dMean = dSum / (double)iLength; } return dMean; } void logMean(bool bXDimension, bool bYDimension, AffxMultiDimensionalArray& vect) { vect.initialize(); if ((!bXDimension) && (!bYDimension)) {return;} // if ((bXDimension) && (bYDimension)) {AffxLog::throwable(new Exception("AffxMultidimensionalArray function must operate on a vector."));} if (bXDimension) { // if (vect.length() != m_iXDimension) {AffxLog::throwable(new Exception("AffxMultidimensionalArray function must operate on a vector of the appropriate size."));} for (SIZE iXIndex = 0; (iXIndex < m_iXDimension); iXIndex++) { for (SIZE iYIndex = 0; (iYIndex < m_iYDimension); iYIndex++) { vect.set(iXIndex, (TYPE)(vect.get(iXIndex) + log(get(iXIndex, iYIndex)))); } } for (SIZE iXIndex = 0; (iXIndex < m_iXDimension); iXIndex++) { vect.set(iXIndex, (vect.get(iXIndex) / m_iYDimension)); } } else if (bYDimension) { // if (vect.length() != m_iYDimension) {AffxLog::throwable(new Exception("AffxMultidimensionalArray function must operate on a vector of the appropriate size."));} for (SIZE iXIndex = 0; (iXIndex < m_iXDimension); iXIndex++) { for (SIZE iYIndex = 0; (iYIndex < m_iYDimension); iYIndex++) { vect.set(iYIndex, (TYPE)(vect.get(iYIndex) + log(get(iXIndex, iYIndex)))); } } for (SIZE iYIndex = 0; (iYIndex < m_iYDimension); iYIndex++) { vect.set(iYIndex, (vect.get(iYIndex) / m_iXDimension)); } } } void logVar(bool bXDimension, bool bYDimension, AffxMultiDimensionalArray& vectMeans, AffxMultiDimensionalArray& vectVariances) { vectMeans.initialize(); vectVariances.initialize(); if ((!bXDimension) && (!bYDimension)) {return;} // if ((bXDimension) && (bYDimension)) {AffxLog::throwable(new Exception("AffxMultidimensionalArray function must operate on a vector."));} if (bXDimension) { // if (vectMeans.length() != m_iXDimension) {AffxLog::throwable(new Exception("AffxMultidimensionalArray function must operate on a vector of the appropriate size."));} // if (vectVariances.length() != m_iXDimension) {AffxLog::throwable(new Exception("AffxMultidimensionalArray function must operate on a vector of the appropriate size."));} // Calculate Means. for (SIZE iXIndex = 0; (iXIndex < m_iXDimension); iXIndex++) { for (SIZE iYIndex = 0; (iYIndex < m_iYDimension); iYIndex++) { vectMeans.set(iXIndex, (TYPE)(vectMeans.get(iXIndex) + log(get(iXIndex, iYIndex)))); } } for (SIZE iXIndex = 0; (iXIndex < m_iXDimension); iXIndex++) { vectMeans.set(iXIndex, (vectMeans.get(iXIndex) / m_iYDimension)); } // Calculate variances. for (SIZE iXIndex = 0; (iXIndex < m_iXDimension); iXIndex++) { for (SIZE iYIndex = 0; (iYIndex < m_iYDimension); iYIndex++) { vectVariances.set(iXIndex, (TYPE)(vectVariances.get(iXIndex) + ((log(get(iXIndex, iYIndex)) - vectMeans.get(iXIndex)) * (log(get(iXIndex, iYIndex)) - vectMeans.get(iXIndex))))); } } for (SIZE iXIndex = 0; (iXIndex < m_iXDimension); iXIndex++) { vectVariances.set(iXIndex, (vectVariances.get(iXIndex) / (m_iYDimension - 1))); } } else if (bYDimension) { // if (vectMeans.length() != m_iYDimension) {AffxLog::throwable(new Exception("AffxMultidimensionalArray function must operate on a vector of the appropriate size."));} // if (vectVariances.length() != m_iYDimension) {AffxLog::throwable(new Exception("AffxMultidimensionalArray function must operate on a vector of the appropriate size."));} // Calculate Means. for (SIZE iXIndex = 0; (iXIndex < m_iXDimension); iXIndex++) { for (SIZE iYIndex = 0; (iYIndex < m_iYDimension); iYIndex++) { vectMeans.set(iYIndex, (TYPE)(vectMeans.get(iYIndex) + log(get(iXIndex, iYIndex)))); } } for (SIZE iYIndex = 0; (iYIndex < m_iYDimension); iYIndex++) { vectMeans.set(iYIndex, (vectMeans.get(iYIndex) / m_iXDimension)); } // Calculate variances. for (SIZE iXIndex = 0; (iXIndex < m_iXDimension); iXIndex++) { for (SIZE iYIndex = 0; (iYIndex < m_iYDimension); iYIndex++) { vectVariances.set(iYIndex, (TYPE)(vectVariances.get(iYIndex) + ((log(get(iXIndex, iYIndex)) - vectMeans.get(iYIndex)) * (log(get(iXIndex, iYIndex)) - vectMeans.get(iYIndex))))); } } for (SIZE iYIndex = 0; (iYIndex < m_iYDimension); iYIndex++) { vectVariances.set(iYIndex, (vectVariances.get(iYIndex) / (m_iXDimension - 1))); } } } double nonZeroMean() { double dMean = 0.0; mustBeVector(); if (length() > 0) { double dSum = 0.0; SIZE iCount = 0; for (SIZE iIndex = 0; (iIndex < length()); iIndex++) { if (get(iIndex) != 0) { dSum += get(iIndex); iCount++; } } if (iCount == 0) {dMean = 0.0;} else {dMean = dSum / (double)iCount;} } return dMean; } double sd() { double dStandardDeviation = 0.0; double dMean = mean(); mustBeVector(); if (length() > 1) { double dSumOfSquares = 0.0; for (SIZE iIndex = 0; (iIndex < length()); iIndex++) { dSumOfSquares += ((get(iIndex) - dMean) * (get(iIndex) - dMean)); } dStandardDeviation = sqrt(dSumOfSquares / (length() - 1)); } return dStandardDeviation; } double var() { double dVariance = 0.0; double dMean = mean(); mustBeVector(); if (length() > 1) { double dSumOfSquares = 0.0; for (SIZE iIndex = 0; (iIndex < length()); iIndex++) { dSumOfSquares += ((get(iIndex) - dMean) * (get(iIndex) - dMean)); } dVariance = dSumOfSquares / (length() - 1); } return dVariance; } double var(SIZE iLength) { double dVariance = 0.0; double dMean = mean(iLength); mustBeVector(); if (iLength > 1) { double dSumOfSquares = 0.0; for (SIZE iIndex = 0; iIndex < iLength; iIndex++) { dSumOfSquares += ((get(iIndex) - dMean) * (get(iIndex) - dMean)); } dVariance = dSumOfSquares / (iLength - 1); } return dVariance; } double corr() { if (getYDimension() != 2 ) {return std::numeric_limits::quiet_NaN();} SIZE iLength = getXDimension(); double dSum = 0; for (SIZE iElementIndex = 0; (iElementIndex < iLength); iElementIndex++) { dSum += get(iElementIndex, 0); } double m1 = (dSum / (double)iLength); dSum = 0; for (SIZE iElementIndex = 0; (iElementIndex < iLength); iElementIndex++) { dSum += get(iElementIndex, 1); } double m2 = (dSum / (double)iLength); double dNumerator = 0; double d1 = 0;; double d2 = 0; for (SIZE iElementIndex = 0; (iElementIndex < iLength); iElementIndex++) { dNumerator += (get(iElementIndex, 0) - m1) * (get(iElementIndex, 1) - m2); d1 += (get(iElementIndex, 0) - m1) * (get(iElementIndex, 0) - m1) ; d2 += (get(iElementIndex, 1) - m2) * (get(iElementIndex, 1) - m2) ; } // double dDenominator = sqrt(d1 * d2); return dNumerator / sqrt(d1) / sqrt(d2); } void test() { mustBeVector(); for (SIZE iIndex = 0; (iIndex < length()); iIndex++) { TYPE t = get(iIndex); SIZE x = 0; } } void quickSort() {mustBeVector(); quickSort(0, (length() - 1), 0);} void quickSort(SIZE iFrom, SIZE iTo, SIZE iCompareCode) { if ((m_ar != NULL) && (length() >= 2)) { TYPE objCenter = 0; SIZE i = iFrom; SIZE j = iTo; objCenter = get((iFrom + iTo) / 2); do { while ((i < iTo) && (objCenter > get(i))) i++; while ((j > iFrom) && (objCenter < get(j))) j--; if (i < j) {swap(i, j);} if (i <= j) { i++; j--; } } while (i <= j); if (iFrom < j) {quickSort(iFrom, j, iCompareCode);} if (i < iTo) {quickSort(i, iTo, iCompareCode);} objCenter = 0; } } void swap(SIZE iIndex1, SIZE iIndex2) { TYPE temp = get(iIndex1); set(iIndex1, get(iIndex2)); set(iIndex2, temp); } double percentile(double dPercentile, bool bSort = true) { if (length() == 0) {return numeric_limits::quiet_NaN();} double d = 0.0; mustBeVector(); if (bSort) {quickSort();} double dIndex = ((length() - 1) * dPercentile); double dMultiplier = dIndex - floor(dIndex); d = get((int)floor(dIndex)) + ((get((int)ceil(dIndex)) - get((int)floor(dIndex))) * dMultiplier); return d; } double percentile(double dPercentile, SIZE iLength, bool bSort = true) { if (iLength == 0) {return numeric_limits::quiet_NaN();} double d = 0.0; mustBeVector(); if (bSort) {quickSort(0, (iLength - 1), 0);} double dIndex = ((iLength - 1) * dPercentile); double dMultiplier = dIndex - floor(dIndex); d = get((int)floor(dIndex)) + ((get((int)ceil(dIndex)) - get((int)floor(dIndex))) * dMultiplier); return d; } double finiteMedian() { mustBeVector(); vector finiteNumbers; for(SIZE i = 0; i < this->length(); i++) { double d = (double) get(i); if(Util::isFinite(d)) { finiteNumbers.push_back(d); } } if (finiteNumbers.empty()) {return numeric_limits::quiet_NaN();} double med = median_in_place(finiteNumbers.begin(),finiteNumbers.end()); return med; } TYPE median() { if (length() == 0) {return numeric_limits::quiet_NaN();} TYPE d = 0.0; mustBeVector(); std::vector vNonNanVector; typename std::vector::iterator iter; for(int iIndex=0; iIndex ::quiet_NaN(); } if (length % 2 == 0) { iter = vNonNanVector.begin() + ((int)floor((double)length / 2.0) - 1); std::nth_element(vNonNanVector.begin(), iter, vNonNanVector.end()); std::nth_element(iter, iter + 1, vNonNanVector.end()); d = *iter + (*(iter + 1) - *iter)/2.0; } else { iter = vNonNanVector.begin() + (int)floor((double)length / 2.0); std::nth_element(vNonNanVector.begin(), iter, vNonNanVector.end()); d = *iter; } return d; } TYPE median(SIZE iLength) { if (iLength == 0) {return numeric_limits::quiet_NaN();} TYPE d = 0.0; mustBeVector(); std::vector vNonNanVector; typename std::vector::iterator iter; for(int iIndex=0; iIndex ::quiet_NaN(); } if ((iLocalLength % 2) == 0) { iter = vNonNanVector.begin() + ((int)floor((double)iLocalLength / 2.0) - 1); std::nth_element(vNonNanVector.begin(), iter, vNonNanVector.begin() + iLocalLength); std::nth_element(iter, iter + 1, vNonNanVector.begin() + iLocalLength); d = *iter + (*(iter + 1) - *iter)/2.0; } else { iter = vNonNanVector.begin() + (int)floor((double)iLocalLength / 2.0); std::nth_element(vNonNanVector.begin(), iter, vNonNanVector.begin() + iLocalLength); d = *iter; } return d; } static SIZE readVectorSize(const AffxString& strFileName, bool bHeader) { SIZE iCount = affx::TsvFile::getLineCountInFile(strFileName); if ( iCount > 0 && bHeader ) { iCount--; } } bool readVector(const AffxString& strFileName, bool bHeader) { try { affx::TsvFile tsv; SIZE iCount = 0; std::string col; tsv.m_optAutoTrim = true; tsv.m_optHasColumnHeader = bHeader; tsv.m_optAutoColumns = !bHeader; if (tsv.open(strFileName) == affx::TSV_OK) { while (tsv.nextLevel(0) == affx::TSV_OK) { tsv.get(0,0,col); set(iCount, AffxByteArray(col).parseDouble()); iCount++; } tsv.clear(); } else {return false;} return true; } catch(...) {return false;} } static SIZE readMatrixYSize(const AffxString& strFileName, bool bColHeader) { SIZE iCount = affx::TsvFile::getLineCountInFile(strFileName); if ( (iCount > 0) && bColHeader ) { iCount--; } return iCount; } static SIZE readMatrixXSize(const AffxString& strFileName, bool bRowHeader) { affx::TsvFile tsv; SIZE iCount = 0; if (tsv.openTable(strFileName) == affx::TSV_OK) { tsv.nextLevel(0); iCount = tsv.getColumnCount(0); if (bRowHeader) {iCount--;} tsv.clear(); } else {return 0;} return iCount; } bool readMatrixXHeader(const AffxString& strFileName, bool bRowHeader, AffxArray& ar) { ar.deleteAll(); affx::TsvFile tsv; std::string cname; SIZE iCount = bRowHeader ? 1 : 0; tsv.m_optAutoTrim = true; tsv.m_optAbortOnError = false; if (tsv.open(strFileName) == affx::TSV_OK) { for ( ; iCount < tsv.getColumnCount(0); iCount++ ) { if ( tsv.cidx2cname(0, iCount, cname) == affx::TSV_OK) { ar.add(new AffxString(cname)); } } tsv.clear(); } else {return false;} return true; } bool readMatrixYHeader(const AffxString& strFileName, bool bColHeader, AffxArray& ar) { ar.deleteAll(); affx::TsvFile tsv; SIZE iCount = 0; tsv.m_optAutoTrim = true; tsv.m_optAbortOnError = false; tsv.m_optHasColumnHeader = bColHeader; tsv.m_optAutoColumns = !bColHeader; if (tsv.open(strFileName) == affx::TSV_OK) { std::string col; while (tsv.nextLevel(0) == affx::TSV_OK) { tsv.get(0,0,col); ar.add(new AffxString(col)); } tsv.clear(); } else {return false;} return true; } bool readMatrix(const AffxString& strFileName, bool bColHeader, bool bRowHeader, SIZE iColumnCount) { try { affx::TsvFile tsv; tsv.m_optAutoTrim = true; tsv.m_optAbortOnError = true; tsv.m_optHasColumnHeader = bColHeader; tsv.m_optAutoColumns = !bColHeader; SIZE iCount = 0; if (bRowHeader) {iColumnCount++;} if (tsv.open(strFileName) == affx::TSV_OK) { while (tsv.nextLevel(0) == affx::TSV_OK) { SIZE iColumnNumber = bRowHeader ? 1 : 0; SIZE iColIndex = 0; std::string col; for (; (iColumnNumber < iColumnCount); iColumnNumber++, iColIndex++) { tsv.get(0,iColumnNumber, col); set(iColIndex, iCount, AffxByteArray(col).parseDouble()); } iCount++; } tsv.clear(); } else {return false;} return true; } catch(...) {return false;} } SIZE binarySearch(TYPE& obj) { SIZE iSearchIndex = -1; SIZE iLow = 0; SIZE iHigh = m_iXDimension - 1; SIZE iMid = 0; SIZE iCompareResult = 0; while (iLow <= iHigh) { iMid = (iLow + iHigh) / 2; iCompareResult = AffxArray::compare(obj, get(iMid)); if (iCompareResult < 0) { iHigh = iMid - 1; } else if (iCompareResult > 0) { iLow = iMid + 1; } else { iSearchIndex = iMid; break; } } return iSearchIndex; } SIZE binarySearch(TYPE obj, unsigned int& uiLowIndex, unsigned int& uiHighIndex) { SIZE iSearchIndex = -1; if (obj < get(0)) {uiLowIndex = uiHighIndex = 0; return -1;} if (obj > get(m_iXDimension - 1)) {uiLowIndex = uiHighIndex = (m_iXDimension + 1); return -1;} SIZE iMid = 0; SIZE iLow = 0; SIZE iHigh = m_iXDimension - 1; SIZE iCompareResult = 0; while (iLow <= iHigh) { iMid = (iLow + iHigh) / 2; iCompareResult = AffxArray::compare(obj, get(iMid)); if (iCompareResult < 0) { iHigh = iMid - 1; } else if (iCompareResult > 0) { iLow = iMid + 1; } else { iSearchIndex = iMid; break; } } // Set boundries where get(uiLowIndex) <= n <= get(uiHighIndex) uiLowIndex = uiHighIndex = iMid; if (get(uiLowIndex) == obj) { while ((uiLowIndex > 0) && (get(uiLowIndex - 1) == obj)) {uiLowIndex--;} while ((uiHighIndex < (m_iXDimension - 1)) && (obj == get(uiHighIndex + 1))) {uiHighIndex++;} } else { while ((uiLowIndex > 0) && (get(uiLowIndex) > obj)) {uiLowIndex--;} while ((uiHighIndex < (m_iXDimension - 1)) && (obj > get(uiHighIndex))) {uiHighIndex++;} } return iSearchIndex; } double regress() { double dSlope = 0; double dIntercept = 0; double dXCoefficient = 0; double dSumX = 0.0; double dSumXsq = 0.0; double dSumY = 0.0; double dSumYsq = 0.0; double dSumXY = 0.0; double dMaxX = -99999; double dMaxY = -99999; double x = 0.0; double y = 0.0; double xSq = 0.0; double ySq = 0.0; for(SIZE i = 0; (i < getXDimension()); ++i) { x = get(i, 0); y = get(i, 1); xSq = x * x; ySq = y * y; dSumX += x; dSumXsq += xSq; dSumY += y; dSumYsq += ySq; dSumXY += (x * y); dMaxX = Max(x, dMaxX); dMaxY = Max(y, dMaxY); } double dSlopeNumerator = 0.0; double dSlopeDenomenator = 0.0; dSlopeNumerator = (getXDimension() * dSumXY) - (dSumX * dSumY); dSlopeDenomenator = (getXDimension() * dSumXsq) - (dSumX * dSumX); if ((dSlopeDenomenator <= 0) || (getXDimension() <= 2)) { return numeric_limits::quiet_NaN(); } dSlope = dSlopeNumerator / dSlopeDenomenator; dIntercept = (dSumY - (dSlope * dSumX)) / getXDimension(); double dSxSq = (dSumXsq - (dSumXsq / getXDimension())) / (getXDimension() - 1); double dSySq = (dSumYsq - (dSumYsq / getXDimension())) / (getXDimension() - 1); dXCoefficient = dSlope * (sqrt(dSxSq) / sqrt(dSySq)); return dSlope; } double normDifference() { SIZE iLength = getXDimension(); double dSum = 0; for (SIZE iElementIndex = 0; (iElementIndex < iLength); iElementIndex++) { double dDiff = get(iElementIndex, 0) - get(iElementIndex, 1); dSum += dDiff * dDiff; } return sqrt(dSum); } }; template double AffxMultiDimensionalArray::infinity; #endif affxparser/src/fusion/util/AffxSTL.h0000644000175200017520000001050114516003651020422 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffxSTL_h_ #define _AffxSTL_h_ /** * @file AffxSTL.h * * @brief This header contains definitions of STL-related tools */ // // #undef max #include #include using std::size_t; using std::ptrdiff_t; using std::numeric_limits; using std::unary_function; namespace affxstl { /** * @brief An allocator which releases memory ASAP (calls ::operator delete upon exiting scope). */ template class quickReleaseAlloc; template <> class quickReleaseAlloc { public: typedef void* pointer; typedef const void* const_pointer; typedef void value_type; template struct rebind { typedef quickReleaseAlloc other; }; }; template class quickReleaseAlloc { public: typedef size_t size_type; typedef ptrdiff_t difference_type; typedef T* pointer; typedef const T* const_pointer; typedef T& reference; typedef const T& const_reference; typedef T value_type; template struct rebind { typedef quickReleaseAlloc other; }; pointer address(reference value) const { return &value; } const_pointer address(const_reference value) const { return &value; } quickReleaseAlloc() throw() {} quickReleaseAlloc(const quickReleaseAlloc&) throw() {} template quickReleaseAlloc(const quickReleaseAlloc&) throw() {} ~quickReleaseAlloc() throw() {} size_type max_size() const throw() { return numeric_limits::max()/sizeof(T); } pointer allocate(size_type num, quickReleaseAlloc::const_pointer = 0) { return (pointer)(::operator new(num*sizeof(T))); } void construct(pointer p, const T& value) { new ((void *)p) T(value); } void destroy(pointer p) { p->~T(); } void deallocate(pointer p, size_type num) { ::operator delete((void *)p); } }; template bool operator==(const quickReleaseAlloc&, const quickReleaseAlloc&) throw() { return true; } template bool operator!=(const quickReleaseAlloc&, const quickReleaseAlloc&) throw() { return false; } /** * @brief An implementation of the nonexistent copy_if (after Meyers' STL book, Item 36). */ template OutputItr copy_if(InputItr fromBegin, InputItr fromEnd, OutputItr toBegin, Predicate pred) { while (fromBegin != fromEnd) { if (pred(*fromBegin)) { *toBegin = *fromBegin; ++toBegin; } ++fromBegin; } return toBegin; } /** * @brief Adopted from the old SGI template library. Makes possible things like: * * // copy all keys of map M to vector V: * transform(M.begin(), M.end(), back_inserter(V), select1st::value_type>()); * * // copy all values to vector W: * transform(M.begin(), M.end(), back_inserter(W), select2nd::value_type>()); */ template struct select1st : public unary_function { const typename PT::first_type& operator()(const PT& v) const { return v.first; } }; template struct select2nd : public unary_function { const typename PT::second_type& operator()(const PT& v) const { return v.second; } }; } // namespace affxstl #endif // _AffxSTL_h_ affxparser/src/fusion/util/AffxSplitArray.h0000644000175200017520000000707714516003651022070 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2009 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffxSplitArray_h_ #define _AffxSplitArray_h_ /** * @file AffxSplitArray.h * * @brief This header contains the AffxSplitArray class definition. */ #include "util/Convert.h" #include "util/Verbose.h" // #include // template class AffxSplitArray { private: /*! Vector of arrays of data objects. */ std::vector vData; /*! The size of each array of data in the data vector. */ std::vector vDataSizes; /*! The total number of data objects stored. */ int vDataSize; /*! Allocate memory for the data. * @param n The number of data objects. * @param narrays The number of vector elements to create for the data storage. */ void allocate(int n, int narrays) { // NB: this code assumes n >= narrays vDataSize = n; vDataSizes.resize(narrays); vData.resize(narrays); for (int i=0; i= vDataSizes.size()) idx = vDataSizes.size() - 1; i -= (entriesPerSet * idx); return &vData[idx][i]; } TYPE& operator[](int i) { return *getAt(i); } const TYPE& operator[](int i) const { return *getAt(i); } }; #endif // _AffxArray_h_ affxparser/src/fusion/util/AffxStatistics.cpp0000644000175200017520000001531114516003651022451 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file AffxStatistics.cpp * * @brief This file contains the AffxStatistics class members. */ #include "util/AffxStatistics.h" // #include "util/AffxConv.h" // #include #include // //#include "portability/affy-base-types.h" using namespace std; #define NaN numeric_limits::quiet_NaN() AffxStatistics::AffxStatistics() { } AffxStatistics::~AffxStatistics() { } double AffxStatistics::factorial(int i) { double lResult = 1; for (int iIndex = 2; (iIndex <= i); iIndex++) { lResult *= iIndex; } return lResult; } double AffxStatistics::binocdf(int x, int n, double p) { double dSum = 0; for (int iIndex = 0; (iIndex <= x); iIndex++) { double a = (factorial(n) / (factorial(iIndex) * (factorial(n - iIndex)))); dSum += (a * pow(p, iIndex) * pow((1 - p), (n - iIndex))); } return dSum; } int AffxStatistics::binoinv(double y, int n, double p) { int iResult = 0; for (int i = 0; (i < n); i++) { if (binocdf(i, n, p) >= y) { iResult = i; break; } } return iResult; } // http://home.online.no/~pjacklam/notes/invnorm/ // http://home.online.no/~pjacklam/notes/invnorm/impl/lea/lea.c /* * The standard normal CDF, for one random variable. * * Author: W. J. Cody * URL: http://www.netlib.org/specfun/erf * * This is the erfc() routine only, adapted by the * transform stdnormal_cdf(u)=(erfc(-u/sqrt(2))/2; */ // Q: What about copyright? // A: You can use the algorithm, including any of the computer implementations listed in the section Computer implementations, for whatever purpuse you want, but please show common courtesy and give credit where credit is due. double AffxStatistics::normcdf(double u) { const double a[5] = { 1.161110663653770e-002,3.951404679838207e-001,2.846603853776254e+001, 1.887426188426510e+002,3.209377589138469e+003 }; const double b[5] = { 1.767766952966369e-001,8.344316438579620e+000,1.725514762600375e+002, 1.813893686502485e+003,8.044716608901563e+003 }; const double c[9] = { 2.15311535474403846e-8,5.64188496988670089e-1,8.88314979438837594e00, 6.61191906371416295e01,2.98635138197400131e02,8.81952221241769090e02, 1.71204761263407058e03,2.05107837782607147e03,1.23033935479799725E03 }; const double d[9] = { 1.00000000000000000e00,1.57449261107098347e01,1.17693950891312499e02, 5.37181101862009858e02,1.62138957456669019e03,3.29079923573345963e03, 4.36261909014324716e03,3.43936767414372164e03,1.23033935480374942e03 }; const double p[6] = { 1.63153871373020978e-2,3.05326634961232344e-1,3.60344899949804439e-1, 1.25781726111229246e-1,1.60837851487422766e-2,6.58749161529837803e-4 }; const double q[6] = { 1.00000000000000000e00,2.56852019228982242e00,1.87295284992346047e00, 5.27905102951428412e-1,6.05183413124413191e-2,2.33520497626869185e-3 }; register double y, z; if (u != u) return NaN; if ((u == numeric_limits::infinity()) || (u == -numeric_limits::infinity())) return (u < 0 ? 0.0 : 1.0); y = fabs(u); if (y <= 0.46875*sqrt(2.0)) { /* evaluate erf() for |u| <= sqrt(2)*0.46875 */ z = y*y; y = u*((((a[0]*z+a[1])*z+a[2])*z+a[3])*z+a[4]) /((((b[0]*z+b[1])*z+b[2])*z+b[3])*z+b[4]); return 0.5+y; } z = exp(-y*y/2)/2; if (y <= 4.0) { /* evaluate erfc() for sqrt(2)*0.46875 <= |u| <= sqrt(2)*4.0 */ y = y/sqrt(2.0); y = ((((((((c[0]*y+c[1])*y+c[2])*y+c[3])*y+c[4])*y+c[5])*y+c[6])*y+c[7])*y+c[8]) /((((((((d[0]*y+d[1])*y+d[2])*y+d[3])*y+d[4])*y+d[5])*y+d[6])*y+d[7])*y+d[8]); y = z*y; } else { /* evaluate erfc() for |u| > sqrt(2)*4.0 */ z = z*sqrt(2.0)/y; y = 2/(y*y); y = y*(((((p[0]*y+p[1])*y+p[2])*y+p[3])*y+p[4])*y+p[5]) /(((((q[0]*y+q[1])*y+q[2])*y+q[3])*y+q[4])*y+q[5]); y = z*(sqrt(PI)-y); } return (u < 0.0 ? y : 1-y); }; // http://home.online.no/~pjacklam/notes/invnorm/ // http://home.online.no/~pjacklam/notes/invnorm/impl/lea/lea.c /* * The inverse standard normal distribution. * * Author: Peter J. Acklam * URL: http://home.online.no/~pjacklam * * This function is based on the MATLAB code from the address above, * translated to C, and adapted for our purposes. */ // Q: What about copyright? // A: You can use the algorithm, including any of the computer implementations listed in the section Computer implementations, for whatever purpuse you want, but please show common courtesy and give credit where credit is due. double AffxStatistics::norminv(double p) { const double a[6] = { -3.969683028665376e+01, 2.209460984245205e+02, -2.759285104469687e+02, 1.383577518672690e+02, -3.066479806614716e+01, 2.506628277459239e+00 }; const double b[5] = { -5.447609879822406e+01, 1.615858368580409e+02, -1.556989798598866e+02, 6.680131188771972e+01, -1.328068155288572e+01 }; const double c[6] = { -7.784894002430293e-03, -3.223964580411365e-01, -2.400758277161838e+00, -2.549732539343734e+00, 4.374664141464968e+00, 2.938163982698783e+00 }; const double d[4] = { 7.784695709041462e-03, 3.224671290700398e-01, 2.445134137142996e+00, 3.754408661907416e+00 }; register double q, t, u; if ((p != p) || p > 1.0 || p < 0.0) return NaN; if (p == 0.0) return NaN; if (p == 1.0) return NaN; q = min(p,1-p); if (q > 0.02425) { /* Rational approximation for central region. */ u = q-0.5; t = u*u; u = u*(((((a[0]*t+a[1])*t+a[2])*t+a[3])*t+a[4])*t+a[5]) /(((((b[0]*t+b[1])*t+b[2])*t+b[3])*t+b[4])*t+1); } else { /* Rational approximation for tail region. */ t = sqrt(-2*log(q)); u = (((((c[0]*t+c[1])*t+c[2])*t+c[3])*t+c[4])*t+c[5]) /((((d[0]*t+d[1])*t+d[2])*t+d[3])*t+1); } /* The relative error of the approximation has absolute value less than 1.15e-9. One iteration of Halley's rational method (third order) gives full machine precision... */ t = normcdf(u)-q; /* error */ t = t*sqrt(2*PI)*exp(u*u/2); /* f(u)/df(u) */ u = u-t/(1+u*t/2); /* Halley's method */ return (p > 0.5 ? -u : u); }; affxparser/src/fusion/util/AffxStatistics.h0000644000175200017520000000266314516003651022124 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffxStatistics_h_ #define _AffxStatistics_h_ /** * @file AffxStatistics.h * * @brief This header contains the AffxStatistics class definition. */ #define PI 3.1415926536 #include "util/AffxMultiDimensionalArray.h" // /** * @brief A class for statistical functions. * */ class AffxStatistics { protected: public: AffxStatistics(); virtual ~AffxStatistics(); static double factorial(int i); static double binocdf(int x, int n, double p); static int binoinv(double y, int n, double p); static double normcdf(double u); static double norminv(double p); }; #endif affxparser/src/fusion/util/AffxString.cpp0000644000175200017520000005616614516003651021602 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file AffxString.cpp * * @brief This file contains the AffxString class members. */ // #include "util/AffxString.h" // #include "util/AffxArray.h" #include "util/AffxConv.h" // #include #include #include // //AffxStringHashtable AffxString::m_ht(29); void AffxString::stripPrecedingCharacters (char cChar) { for (unsigned int iIndex = 0; (iIndex < length()); iIndex++) { if (charAt(iIndex) == cChar) { *this = substr(1); iIndex--; } else {break;} } } void AffxString::stripTrailingCharacters (char cChar) { for (int iIndex = (int)length() - 1; (iIndex >= 0); iIndex--) { if (charAt(iIndex) == cChar) { *this = substr(0, length() - 1); } else {break;} } } void AffxString::padBlanks (short nMax) { int nLen = (int)length(); for (; nLen < nMax; nLen++) *this += ' '; } bool AffxString::isBlank () { for (unsigned int i(0); i < length(); i++) if ((*this)[i] != ' ') return false; return true; } int AffxString::printf(const char *pszFormat, ...) { int iResult = 0; char szBuffer[MAX_SPRINTF_SIZE]; va_list args; va_start(args, pszFormat); #ifdef WIN32 iResult = vsprintf_s(szBuffer, MAX_SPRINTF_SIZE, pszFormat, args); #else iResult = vsprintf(szBuffer, pszFormat, args); #endif *this += szBuffer; va_end(args); return iResult; } int AffxString::sprintf(const char *pszFormat, ...) { int iResult = 0; char szBuffer[MAX_SPRINTF_SIZE]; va_list args; va_start(args, pszFormat); #ifdef WIN32 iResult = vsprintf_s(szBuffer, MAX_SPRINTF_SIZE, pszFormat, args); #else iResult = vsprintf(szBuffer, pszFormat, args); #endif *this = szBuffer; va_end(args); return iResult; } void AffxString::stripAndPad(int iLength) { stripPrecedingBlanks(); stripTrailingBlanks(); padBlanks((short)iLength); } void AffxString::strip() { stripPrecedingBlanks(); stripTrailingBlanks(); } bool AffxString::startsWith(const AffxString& strCompare) const { unsigned int iCompareLength = (unsigned int) strCompare.length(); if (length() >= iCompareLength) { return (strncmp(this->c_str(), strCompare.c_str(), iCompareLength) == 0); } return false; } AffxString AffxString::reverseComplement() { AffxString str = *this; AffxString strComplement = "tvghefcdijmlknopqysaabwxrz"; char ch = 0; int iCount = (int)length(); int iIndex = 0; str.toLowerCase(); if (((iCount / 2) * 2) != iCount) { iIndex = (iCount / 2); if (str.getAt(iIndex) < 'a') {str.setAt(iIndex, 'n');} try {str.setAt(iIndex, strComplement.getAt(str.getAt(iIndex) - 'a'));} catch(...) {str.setAt(iIndex, 'n');} } for (int i = 0; (i < (iCount / 2)); i++) { iIndex = (iCount - i - 1); ch = str.getAt(i); if (ch < 'a') {ch = 'n';} if (str.getAt(iIndex) < 'a') {str.setAt(iIndex, 'n');} try {str.setAt(i, strComplement.getAt(str.getAt(iIndex) - 'a'));} catch(...) {str.setAt(i, 'n');} try {str.setAt(iIndex, strComplement.getAt(ch - 'a'));} catch(...) {str.setAt(iIndex, 'n');} } return str; } AffxString AffxString::complement() { AffxString str = *this; AffxString strComplement = "tvghefcdijmlknopqywaabsxrz"; int iCount = (int)length(); str.toLowerCase(); for (int i = 0; (i < iCount); i++) { try {str.setAt(i, strComplement.getAt(str.getAt(i) - 'a'));} catch(...) {str.setAt(i, 'n');} } return str; } AffxString AffxString::reverse() { AffxString str = *this; char ch = 0; int iCount = (int)length(); int iIndex = 0; for (int i = 0; (i < (iCount / 2)); i++) { iIndex = (iCount - i - 1); ch = getAt(i); str.setAt(i, str.getAt(iIndex)); str.setAt(iIndex, ch); } return str; } void AffxString::convertAmbiguities(char chAmbiguity) { int iSeqCount = (int)length(); for (int iSeqIndex = 0; (iSeqIndex < iSeqCount); iSeqIndex++) { char ch = getAt(iSeqIndex); if ((ch != 'a') && (ch != 'c') && (ch != 'g') && (ch != 't')) { setAt(iSeqIndex, chAmbiguity); } } } // Take the int parameters and convert it to a charcter string with // commas. (Ex. -99999 converts to "-99,999") AffxString AffxString::intToString(int i, bool bCommas) { return ::getInt(i, bCommas); } // Take the double parameter and convert it to a charcter string with // commas. (Ex. -99999.99 converts to "-99,999.99") AffxString AffxString::doubleToString(double d, int iDecimalPlaces, bool bCommas) { return ::getDouble(d, iDecimalPlaces, bCommas); } // Looks for the first occurance of = and returns whatever is before it. Returns the input if = not found. AffxString AffxString::getKey() { size_t iIndex = this->find_first_of("="); if( iIndex !=npos) { return this->substr(0, iIndex); } else { return *this; } } AffxString AffxString::removeCruft() { if(this->startsWith("#%affymetrix-")) { return this->substr(13, this->length()); } else { bool value = this->startsWith("#%"); if(value) { return this->substr(2, this->length()); } else { return *this; } } } // Looks for the first occurance of = and returns whatever is after it. Returns the empty string if = not found. AffxString AffxString::getValue() { size_t iIndex = this->find_first_of("="); if( iIndex !=npos) { return this->substr(iIndex+1, this->length()); } else { return *this; } } // This function is used by AffxArray for sorting and searching. int AffxString::compareTo(const AffxString& obj, int iCompareCode) const { int iCompareResult = 0; switch(iCompareCode) { case 0: // Identifier if (*this > obj) iCompareResult = 1; else if (*this < obj) iCompareResult = -1; else iCompareResult = 0; break; } return iCompareResult; } AffxString& AffxString::replace(char cOld, char cNew) { int iFindIndex = 0; while ((iFindIndex = (int)find(cOld)) != -1) setAt(iFindIndex, cNew); return *this; } bool AffxString::endsWith(const AffxString& str) const { if (getLength() < str.getLength()) {return false;} return (strcmp(c_str() + (getLength() - str.getLength()), str.c_str()) == 0); /* bool bEndsWith = false; int iCompareLength = strCompare.length(); if (length() >= iCompareLength) { int iIndex2 = 0; bEndsWith = true; for (int iIndex = (length() - iCompareLength); (iIndex < length()); iIndex++) { if (GetAt(iIndex) != strCompare.GetAt(iIndex2)) { bEndsWith = false; break; } iIndex2++; } } return bEndsWith; */ } AffxString AffxString::removeSurroundingQuotes() { if ((startsWith("\"")) && (endsWith("\""))) { *this = substring(1, (int)length() - 1); } return *this; } int AffxString::nextIndexOf(const AffxString& str, int iIndex) const { for (unsigned int i = (iIndex + 1); (i <= (getLength() - str.getLength())); i++) { if (substring(i, (i + (int)str.getLength())) == str) { return i; } } return -1; } bool AffxString::isBaseAt(int iIndex) { switch(charAt(iIndex)) { case 'a': return true; case 'A': return true; case 'c': return true; case 'C': return true; case 'g': return true; case 'G': return true; case 't': return true; case 'T': return true; default: return false; } } bool AffxString::isSnpAt(int iIndex) { switch(charAt(iIndex)) { case 'r': return true; case 'R': return true; case 'y': return true; case 'Y': return true; case 'm': return true; case 'M': return true; case 'k': return true; case 'K': return true; case 'w': return true; case 'W': return true; case 's': return true; case 'S': return true; case 'b': return true; case 'B': return true; case 'd': return true; case 'D': return true; case 'h': return true; case 'H': return true; case 'v': return true; case 'V': return true; case 'n': return true; case 'N': return true; default: return false; } } AffxString AffxString::getIUPACCode() { if (this->indexOf("-") != -1) {return "-";} if (this->CompareNoCase("R") == 0) {return "R";} if ((this->CompareNoCase("A/G") == 0) || (this->CompareNoCase("G/A") == 0)) {return "R";} if (this->CompareNoCase("Y") == 0) {return "Y";} if ((this->CompareNoCase("C/T") == 0) || (this->CompareNoCase("T/C") == 0)) {return "Y";} if (this->CompareNoCase("M") == 0) {return "M";} if ((this->CompareNoCase("A/C") == 0) || (this->CompareNoCase("C/A") == 0)) {return "M";} if (this->CompareNoCase("K") == 0) {return "K";} if ((this->CompareNoCase("G/T") == 0) || (this->CompareNoCase("T/G") == 0)) {return "K";} if (this->CompareNoCase("W") == 0) {return "W";} if ((this->CompareNoCase("A/T") == 0) || (this->CompareNoCase("T/A") == 0)) {return "W";} if (this->CompareNoCase("S") == 0) {return "S";} if ((this->CompareNoCase("C/G") == 0) || (this->CompareNoCase("G/C") == 0)) {return "S";} if (this->CompareNoCase("B") == 0) {return "B";} if ((this->CompareNoCase("C/G/T") == 0) || (this->CompareNoCase("T/G/C") == 0)) {return "B";} if (this->CompareNoCase("D") == 0) {return "D";} if ((this->CompareNoCase("A/G/T") == 0) || (this->CompareNoCase("T/G/A") == 0)) {return "D";} if (this->CompareNoCase("H") == 0) {return "H";} if ((this->CompareNoCase("A/C/T") == 0) || (this->CompareNoCase("T/C/A") == 0)) {return "H";} if (this->CompareNoCase("V") == 0) {return "V";} if ((this->CompareNoCase("A/C/G") == 0) || (this->CompareNoCase("G/C/A") == 0)) {return "V";} if (this->CompareNoCase("N") == 0) {return "N";} if ((this->CompareNoCase("A/C/G/T") == 0) || (this->CompareNoCase("T/G/C/A") == 0)) {return "N";} return "?"; } AffxString AffxString::getIUPACCodeRC() { if (*this == "") {return "?";} switch(this->charAt(0)) { case 'r': return "Y"; case 'R': return "Y"; case 'y': return "R"; case 'Y': return "R"; case 'm': return "K"; case 'M': return "K"; case 'k': return "M"; case 'K': return "M"; case 'w': return "W"; case 'W': return "W"; case 's': return "S"; case 'S': return "S"; case 'b': return "V"; case 'B': return "V"; case 'd': return "H"; case 'D': return "H"; case 'h': return "D"; case 'H': return "D"; case 'v': return "B"; case 'V': return "B"; case 'n': return "N"; case 'N': return "N"; case '-': return "-"; case '?': return "?"; default: return "?"; } } bool AffxString::isBiAllelic() { if (this->CompareNoCase("R") == 0) {return true;} if (this->CompareNoCase("A/G") == 0) {return true;} if (this->CompareNoCase("Y") == 0) {return true;} if (this->CompareNoCase("C/T") == 0) {return true;} if (this->CompareNoCase("M") == 0) {return true;} if (this->CompareNoCase("A/C") == 0) {return true;} if (this->CompareNoCase("K") == 0) {return true;} if (this->CompareNoCase("G/T") == 0) {return true;} if (this->CompareNoCase("W") == 0) {return true;} if (this->CompareNoCase("A/T") == 0) {return true;} if (this->CompareNoCase("S") == 0) {return true;} if (this->CompareNoCase("C/G") == 0) {return true;} return false; } bool AffxString::isTriAllelic() { if (this->CompareNoCase("B") == 0) {return true;} if (this->CompareNoCase("C/G/T") == 0) {return true;} if (this->CompareNoCase("D") == 0) {return true;} if (this->CompareNoCase("A/G/T") == 0) {return true;} if (this->CompareNoCase("H") == 0) {return true;} if (this->CompareNoCase("A/C/T") == 0) {return true;} if (this->CompareNoCase("V") == 0) {return true;} if (this->CompareNoCase("A/C/G") == 0) {return true;} return false; } AffxString AffxString::getSnpStringAt(int iIndex) { switch(charAt(iIndex)) { case 'a': return "A"; case 'A': return "A"; case 'c': return "C"; case 'C': return "C"; case 'g': return "G"; case 'G': return "G"; case 't': return "T"; case 'T': return "T"; case 'r': return "AG"; case 'R': return "AG"; case 'y': return "CT"; case 'Y': return "CT"; case 'm': return "AC"; case 'M': return "AC"; case 'k': return "GT"; case 'K': return "GT"; case 'w': return "AT"; case 'W': return "AT"; case 's': return "CG"; case 'S': return "CG"; case 'b': return "CGT"; case 'B': return "CGT"; case 'd': return "AGT"; case 'D': return "AGT"; case 'h': return "ACT"; case 'H': return "ACT"; case 'v': return "ACG"; case 'V': return "ACG"; case 'n': return "ACGT"; case 'N': return "ACGT"; default: return ""; } } /** calculates the melting temperature of a DNA sequence according to an algorithmn * provided by Tom. Does not handle ambigous DNA symbols (e.g. SNP b = c/g/t) */ double AffxString::calculateMeltingTemp() { double dTemp = 0; if (length() < 8) { int iACount = 0; int iCCount = 0; int iGCount = 0; int iTCount = 0; for (unsigned int iIndex = 0; (iIndex < length()); iIndex++) { switch(charAt(iIndex)) { case 'a': iACount++; break; case 'A': iACount++; break; case 'c': iCCount++; break; case 'C': iCCount++; break; case 'g': iGCount++; break; case 'G': iGCount++; break; case 't': iTCount++; break; case 'T': iTCount++; break; default: return 0; } } dTemp = (2 * (iACount + iTCount) + 4 * (iCCount + iGCount)); } else { double dRlnK = 0; double dDeltah = 0; double dDeltas = 0; double dSaltadj = 0; for (unsigned int iIndex = 0; (iIndex < (length() - 1)); iIndex++) { if ((charAt(iIndex + 1) == 'A') || (charAt(iIndex + 1) == 'a')) { if ((charAt(iIndex) == 'A') || (charAt(iIndex) == 'a')) { dDeltah += 8.0; dDeltas += 21.9; } else if ((charAt(iIndex) == 'C') || (charAt(iIndex) == 'c')) { dDeltah += 8.2; dDeltas += 21.0; } else if ((charAt(iIndex) == 'G') || (charAt(iIndex) == 'g')) { dDeltah += 8.8; dDeltas += 23.5; } else if ((charAt(iIndex) == 'T') || (charAt(iIndex) == 't')) { dDeltah += 5.6; dDeltas += 15.2; } } else if ((charAt(iIndex + 1) == 'C') || (charAt(iIndex + 1) == 'c')) { if ((charAt(iIndex) == 'A') || (charAt(iIndex) == 'a')) { dDeltah += 9.4; dDeltas += 25.5; } else if ((charAt(iIndex) == 'C') || (charAt(iIndex) == 'c')) { dDeltah += 10.9; dDeltas += 28.4; } else if ((charAt(iIndex) == 'G') || (charAt(iIndex) == 'g')) { dDeltah += 10.5; dDeltas += 26.4; } else if ((charAt(iIndex) == 'T') || (charAt(iIndex) == 't')) { dDeltah += 8.8; dDeltas += 23.5; } } else if ((charAt(iIndex + 1) == 'G') || (charAt(iIndex + 1) == 'g')) { if ((charAt(iIndex) == 'A') || (charAt(iIndex) == 'a')) { dDeltah += 6.6; dDeltas += 16.4; } else if ((charAt(iIndex) == 'C') || (charAt(iIndex) == 'c')) { dDeltah += 11.8; dDeltas += 29.0; } else if ((charAt(iIndex) == 'G') || (charAt(iIndex) == 'g')) { dDeltah += 10.9; dDeltas += 28.4; } else if ((charAt(iIndex) == 'T') || (charAt(iIndex) == 't')) { dDeltah += 8.2; dDeltas += 21.0; } } else if ((charAt(iIndex + 1) == 'T') || (charAt(iIndex + 1) == 't')) { if ((charAt(iIndex) == 'A') || (charAt(iIndex) == 'a')) { dDeltah += 5.6; dDeltas += 15.2; } else if ((charAt(iIndex) == 'C') || (charAt(iIndex) == 'c')) { dDeltah += 6.6; dDeltas += 16.4; } else if ((charAt(iIndex) == 'G') || (charAt(iIndex) == 'g')) { dDeltah += 9.4; dDeltas += 25.5; } else if ((charAt(iIndex) == 'T') || (charAt(iIndex) == 't')) { dDeltah += 8.0; dDeltas += 21.9; } else return 0; } else return 0; } dRlnK = 1.987 * log(1.0 / (100 * 1.0e-9)); dSaltadj = 7.21 * log(100.0 / 1000.0); dTemp = (1000.0 * (dDeltah - 3.4)) / (dDeltas + dRlnK) - 272.9 + dSaltadj; } return dTemp; } /** calculates the melting temperature of a DNA sequence. If the Sequence contains SNPs (with IUPac codes like * b = c/g/t it calcuates the temp for all possible alleles and returns the MINIMUM temp */ double AffxString::calculateMeltingTempWithSNPs() { if (length() == 0) {return 0;} AffxArray arSequences; //List of sequences with ambiguities removed int iLastStart = 0; int iSnpCount = 0; for (unsigned int iIndex = 0; (iIndex < length()); iIndex++) { if (isSnpAt(iIndex)) { iSnpCount++; if (iSnpCount > 5) {return 0;} int iStart = iIndex; AffxString strAlleles = getSnpStringAt(iStart); AffxString strSubstring = substring(iLastStart, iStart); iLastStart = iStart + 1; if (arSequences.getCount() == 0) { for (unsigned int iAlleleIndex = 0; (iAlleleIndex < strAlleles.length()); iAlleleIndex++) { arSequences.add(new AffxString("" + strSubstring + strAlleles.charAt(iAlleleIndex))); } } else { // create a new list which multiplies the elements by the number of alleles AffxArray arNewSequences; for (int iSequenceIndex = 0; (iSequenceIndex < arSequences.getCount()); iSequenceIndex++) { // seed the new list with copies of the string AffxString strBase = *arSequences.getAt(iSequenceIndex); for (unsigned int iAlleleIndex = 0; (iAlleleIndex < strAlleles.length()); iAlleleIndex++) { arNewSequences.add(new AffxString("" + strBase + strSubstring + strAlleles.charAt(iAlleleIndex))); } } arSequences.deleteAll(); for (int iSequenceIndex = 0; (iSequenceIndex < arNewSequences.getCount()); iSequenceIndex++) { arSequences.add(arNewSequences.getAt(iSequenceIndex)); } arNewSequences.nullAll(); } } } // add the last sequence section to the list AffxString strSubstring = substring(iLastStart, (int)length()); if (arSequences.getCount() == 0) { arSequences.add(new AffxString(strSubstring)); } else { AffxArray arNewSequences; for (int iSequenceIndex = 0; (iSequenceIndex < arSequences.getCount()); iSequenceIndex++) { // seed the new list with copies of the string AffxString strBase = *arSequences.getAt(iSequenceIndex); arNewSequences.add(new AffxString(strBase + strSubstring)); } arSequences.deleteAll(); for (int iSequenceIndex = 0; (iSequenceIndex < arNewSequences.getCount()); iSequenceIndex++) { arSequences.add(arNewSequences.getAt(iSequenceIndex)); } arNewSequences.nullAll(); } double dMinTemp = 1.7976931348623157e+308; // Double.MAX_VALUE for (int iSequenceIndex = 0; (iSequenceIndex < arSequences.getCount()); iSequenceIndex++) { AffxString strSequence = *arSequences.getAt(iSequenceIndex); double dTemp = strSequence.calculateMeltingTemp(); if (dTemp < dMinTemp) { dMinTemp = dTemp; } } arSequences.deleteAll(); return dMinTemp; } double AffxString::getGCContent() { double dGCContent = 0; int iGCCount = 0; int iBaseCount = 0; for (unsigned int iBaseIndex = 0; (iBaseIndex < length()); iBaseIndex++) { if ((charAt(iBaseIndex) == 'C') || (charAt(iBaseIndex) == 'c') || (charAt(iBaseIndex) == 'G') || (charAt(iBaseIndex) == 'g')) { iGCCount++; } iBaseCount++; } if (iBaseCount == 0) {dGCContent = 0;} else {dGCContent = ((double)iGCCount / (double)iBaseCount) * 100;} return dGCContent; } affxparser/src/fusion/util/AffxString.h0000644000175200017520000002306214516003651021234 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffxString_h_ #define _AffxString_h_ /** * @file AffxString.h * * @brief This header contains the AffxString class definition. */ #include "util/AffxConv.h" #include "util/Err.h" // #include #include #include #include #include #include // #define MAX_SPRINTF_SIZE 8192 #define _TAB_CHAR 0x09 /** * @brief A class derived from std::string. * */ class AffxString : public std::string { /* private: static AffxStringHashtable m_ht; public: static AffxStringHashtable* getHashtable() {return &m_ht;} static int getHashtableCount() {return (int)m_ht.GetCount();} */ public: AffxString (); AffxString (const std::string& stringSrc); AffxString (const AffxString& stringSrc); AffxString (char ch, int nRepeat = 1); //AffxString (unsigned char ch, int nRepeat = 1); AffxString (const char* psz); AffxString (const char* pch, int nLength); // Character Query Member Functions // bool isTab (int nIndex) const; bool isWhiteSpace (int nIndex) const; bool isAlphabetic (int nIndex) const; bool isAlphanumeric (int nIndex) const; bool isBlank (int nIndex) const; bool isPunctuation (int nIndex) const; bool isUppercase (int nIndex) const; bool isLowercase (int nIndex) const; bool isNumeric (int nIndex) const; bool isHexNumeric (int nIndex) const; AffxString getKey(); AffxString getValue(); AffxString removeCruft(); // Misc Functions // void toUpper (int nIndex); void toLower (int nIndex); // Tab Expansion // void expandTabs (int nTabSize = 8); unsigned int getLength() const {return (unsigned int)length();} AffxString mid(int iStartIndex, int iLength) const {return substr(iStartIndex, iStartIndex + iLength);} // Stripping Functions // void stripTrailingBlanks (); void stripPrecedingBlanks (); void stripTrailingCharacters (char cChar); void stripPrecedingCharacters (char cChar); void padBlanks (short nMax); bool isBlank (); bool isBlankOrEmpty (); bool isNumber(); int printf(const char *pszFormat, ...); int sprintf(const char *pszFormat, ...); void stripAndPad(int iLength); void strip(void); AffxString& trim() {strip(); return *this;} void zeroFill(int i, int iLength); static AffxString intToString(int i, bool bCommas); static AffxString doubleToString(double d, int iDecimalPlaces, bool bCommas); void setAt(unsigned int iIndex, char c) {if (iIndex < length()) {(*this)[iIndex] = c;} else {Err::errAbort("Out of Bounds exception.");}} char getAt(unsigned int iIndex) const {if (iIndex < length()) {return (*this)[iIndex];} else {Err::errAbort("Out of Bounds exception."); return 0;}} char charAt(unsigned int iIndex) const {if (iIndex < length()) {return (*this)[iIndex];} else {Err::errAbort("Out of Bounds exception."); return 0;}} AffxString& toLowerCase() { for (unsigned int iIndex = 0; (iIndex < length()); iIndex++) { toLower(iIndex); } return *this; } AffxString& toUpperCase() { for (unsigned int iIndex = 0; (iIndex < length()); iIndex++) { toUpper(iIndex); } return *this; } AffxString substring(unsigned int iIndex) const {if (iIndex < length()) {return substr(iIndex);} else {return "";}} AffxString& replace(char cOld, char cNew); AffxString substring(unsigned int iIndex, unsigned int iEndIndex) const {if (iEndIndex > length()) {iEndIndex = (unsigned int)length();} if (iIndex < length()) {return substr(iIndex, (iEndIndex - iIndex));} else {return "";}} bool equals(const AffxString& str) const {return operator==(((const AffxString&)*this), (const AffxString&)str);} // bool operator!=(const AffxString& str) {return !equals(str);} AffxString reverseComplement(); AffxString reverse(); AffxString complement(); void convertAmbiguities(char chAmbiguity); bool startsWith(const AffxString& strCompare) const; int compareTo(const AffxString& obj, int iCompareCode) const; bool endsWith(const AffxString& str) const; int indexOf(const AffxString& strSubstring) const {return (int)find(strSubstring);} int nextIndexOf(const AffxString& strSubstring, int iIndex) const; int lastIndexOf(char ch) const {return (int)rfind(ch);} AffxString& operator+(char ch) {*this += ch; return *this;} AffxString& operator+(int i) { *this += ::getInt(i); return *this; } AffxString removeSurroundingQuotes(); bool isBaseAt(int iIndex); bool isSnpAt(int iIndex); bool isIUPACCodeAt(int iIndex) {return ((isBaseAt(iIndex)) || (isSnpAt(iIndex)));} AffxString getSnpStringAt(int iIndex); double calculateMeltingTemp(); double calculateMeltingTempWithSNPs(); bool isBiAllelic(); bool isTriAllelic(); AffxString getIUPACCode(); AffxString getIUPACCodeRC(); double getGCContent(); unsigned int hash() const { unsigned int nHash = 0; for (unsigned int iIndex = 0; (iIndex < length()); iIndex++) { nHash = (nHash<<5) + nHash + getAt(iIndex); } return nHash; } int compareNoCase(const AffxString& str) const {return CompareNoCase(str);} int CompareNoCase(const AffxString& str) const { AffxString strThis = *this; AffxString strThat = str; strThis.toLowerCase(); strThat.toLowerCase(); return strThis.compareTo(strThat, 0); } template struct ComparePred { bool operator()(const AffxString* lhs, const AffxString* rhs) const { Err::errAbort("AffxString: ComparePred instantiated with an invalid compare code = " + ToStr(k)); return false; } }; }; template<> struct AffxString::ComparePred<0> { bool operator()(const AffxString* lhs, const AffxString* rhs) const { return *lhs < *rhs; } }; inline AffxString::AffxString() : std::string() {} inline AffxString::AffxString(const std::string& stringSrc) : std::string(stringSrc) {} inline AffxString::AffxString(const AffxString& stringSrc) : std::string(stringSrc) {} inline AffxString::AffxString(char ch, int nRepeat) : std::string(nRepeat, ch) {} inline AffxString::AffxString(const char* psz) : std::string(psz) {} inline AffxString::AffxString(const char* pch, int nLength) : std::string(pch, nLength) {} inline bool AffxString::isTab (int nIndex) const // Non Zero if ==, 0 if != { return getAt(nIndex) == _TAB_CHAR; } inline bool AffxString::isAlphabetic (int nIndex) const { return isalpha(getAt(nIndex)) != 0; } inline bool AffxString::isAlphanumeric (int nIndex) const { return isalnum(getAt(nIndex)) != 0; } inline bool AffxString::isWhiteSpace (int nIndex) const { return isspace(getAt(nIndex)) != 0; } inline bool AffxString::isBlank (int nIndex) const { return getAt(nIndex) == ' '; } inline bool AffxString::isPunctuation (int nIndex) const { return ispunct(getAt(nIndex)) != 0; } inline bool AffxString::isUppercase (int nIndex) const { return isupper(getAt(nIndex)) != 0; } inline bool AffxString::isLowercase (int nIndex) const { return islower(getAt(nIndex)) != 0; } inline bool AffxString::isNumeric (int nIndex) const { return isdigit(getAt(nIndex)) != 0; } inline bool AffxString::isHexNumeric (int nIndex) const { return isxdigit(getAt(nIndex)) != 0; } //===== Misc Functions ===== // inline void AffxString::toUpper (int nIndex) { setAt(nIndex, (char)toupper(getAt(nIndex))); } inline void AffxString::toLower (int nIndex) { setAt(nIndex, (char)tolower(getAt(nIndex))); } //===== Stripping Functions ===== // inline void AffxString::stripPrecedingBlanks () { stripPrecedingCharacters (' '); } inline void AffxString::stripTrailingBlanks () { stripTrailingCharacters (' '); } // string can't contain blank spaces inline bool AffxString::isBlankOrEmpty () { if (length() == 0 || (length() > 0 && isBlank())) return true; else return false; } inline bool AffxString::isNumber() { // empty string if (length() == 0) { return false; } bool bIsNumber = true; for (int i = 0; i < (int)length(); i++) { // must be a digit // must be a period and it must be in the middle // must be a minus sign and it must be at the beginning if (!isdigit(getAt(i)) && !(getAt(i) == '.' && 0 < i && i < (int)length() - 1) && !(getAt(i) == '-' && i == 0) && !(getAt(i) == '+' && i == 0)) { bIsNumber = false; break; } } return bIsNumber; } #endif affxparser/src/fusion/util/AffxTime.cpp0000644000175200017520000003755114516003651021227 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "util/AffxTime.h" // #include #include #include #include #include #ifdef _MSC_VER // dont warn about some funcs... #define _CRT_SECURE_NO_WARNINGS #endif #define MIN_DATE (-657434L) // about year 100 #define MAX_DATE 2958465L // about year 9999 // Half a second, expressed in days #define HALF_SECOND (1.0/172800.0) // One-based array of days in year at month start static int rgMonthDays[13] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}; long AffxTime::m_lStartTime = 0; long AffxTime::m_lStopTime = 0; AffxTime::AffxTime() {empty();} AffxTime::AffxTime(const AffxTime& timeSrc) { m_time = timeSrc.m_time;} AffxTimeSpan::AffxTimeSpan() { } ///////////////////////////////////////////////////////////////////////////// // AffxTime - absolute time AffxTime::AffxTime(time_t time) { double dtSrc = (double)time / 24.0 / 60.0 / 60.0; tm tm; memset((void*)&tm, 0, sizeof(tm)); tm.tm_isdst = -1; // The legal range does not actually span year 0 to 9999. if (dtSrc > MAX_DATE || dtSrc < MIN_DATE) // about year 100 to about 9999 return; int nDays; // Number of days since Dec. 30, 1899 int nDaysAbsolute; // Number of days since 1/1/0 int nSecsInDay; // Time in seconds since midnight int nMinutesInDay; // Minutes in day int n400Years; // Number of 400 year increments since 1/1/0 int n400Century; // Century within 400 year block (0,1,2 or 3) int n4Years; // Number of 4 year increments since 1/1/0 int n4Day; // Day within 4 year block // (0 is 1/1/yr1, 1460 is 12/31/yr4) int n4Yr; // Year within 4 year block (0,1,2 or 3) bool bLeap4 = true; // TRUE if 4 year block includes leap year double dblDate = dtSrc; // tempory serial date // If a valid date, then this conversion should not overflow nDays = (int)dblDate; // Round to the second dblDate += ((dtSrc > 0.0) ? HALF_SECOND : -HALF_SECOND); nDaysAbsolute = (int)dblDate + 693959L; // Add days from 1/1/0 to 12/30/1899 dblDate = fabs(dblDate); nSecsInDay = (int)((dblDate - floor(dblDate)) * 86400.); // Calculate the day of week (sun=1, mon=2...) // -1 because 1/1/0 is Sat. +1 because we want 1-based tm.tm_wday = (int)((nDaysAbsolute - 1) % 7L) + 1; // Leap years every 4 yrs except centuries not multiples of 400. n400Years = (int)(nDaysAbsolute / 146097L); // Set nDaysAbsolute to day within 400-year block nDaysAbsolute %= 146097L; // -1 because first century has extra day n400Century = (int)((nDaysAbsolute - 1) / 36524L); // Non-leap century if (n400Century != 0) { // Set nDaysAbsolute to day within century nDaysAbsolute = (nDaysAbsolute - 1) % 36524L; // +1 because 1st 4 year increment has 1460 days n4Years = (int)((nDaysAbsolute + 1) / 1461L); if (n4Years != 0) n4Day = (int)((nDaysAbsolute + 1) % 1461L); else { bLeap4 = false; n4Day = (int)nDaysAbsolute; } } else { // Leap century - not special case! n4Years = (int)(nDaysAbsolute / 1461L); n4Day = (int)(nDaysAbsolute % 1461L); } if (bLeap4) { // -1 because first year has 366 days n4Yr = (n4Day - 1) / 365; if (n4Yr != 0) n4Day = (n4Day - 1) % 365; } else { n4Yr = n4Day / 365; n4Day %= 365; } // n4Day is now 0-based day of year. Save 1-based day of year, year number tm.tm_yday = (int)n4Day + 1; tm.tm_year = n400Years * 400 + n400Century * 100 + n4Years * 4 + n4Yr; // Handle leap year: before, on, and after Feb. 29. if (n4Yr == 0 && bLeap4) { // Leap Year if (n4Day == 59) { /* Feb. 29 */ tm.tm_mon = 2; tm.tm_mday = 29; goto DoTime; } // Pretend it's not a leap year for month/day comp. if (n4Day >= 60) --n4Day; } // Make n4DaY a 1-based day of non-leap year and compute // month/day for everything but Feb. 29. ++n4Day; // Month number always >= n/32, so save some loop time */ for (tm.tm_mon = (n4Day >> 5) + 1; n4Day > rgMonthDays[tm.tm_mon]; tm.tm_mon++); tm.tm_mday = (int)(n4Day - rgMonthDays[tm.tm_mon-1]); DoTime: if (nSecsInDay == 0) tm.tm_hour = tm.tm_min = tm.tm_sec = 0; else { tm.tm_sec = (int)nSecsInDay % 60L; nMinutesInDay = nSecsInDay / 60L; tm.tm_min = (int)nMinutesInDay % 60; tm.tm_hour = (int)nMinutesInDay / 60; } m_time = tm; } AffxTime::AffxTime(int nYear, int nMonth, int nDay, int nHour, int nMin, int nSec, int nDST) { struct tm atm; memset((void*)&atm, 0, sizeof(tm)); atm.tm_isdst = -1; atm.tm_sec = nSec; atm.tm_min = nMin; atm.tm_hour = nHour; if (nDay < 1 && nDay > 31) {Err::errAbort("AffxTime, invalid day value.");} atm.tm_mday = nDay; // _ASSERTE(nMonth >= 1 && nMonth <= 12); atm.tm_mon = nMonth; atm.tm_year = nYear; atm.tm_isdst = nDST; m_time = atm; // _ASSERTE(m_time != -1); // indicates an illegal input time } AffxTime AffxTime::getCurrentTime() // return the current system time { AffxTime time; tm tm; time_t lTime = ::time(NULL); tm = *localtime(&lTime); time.m_time = tm; // Do not free(ptm) as subsequent calls to this function will then fail. time.m_time.tm_mon++; if (time.m_time.tm_year < 50) time.m_time.tm_year = time.m_time.tm_year + 2000; else if (time.m_time.tm_year < 100) time.m_time.tm_year = time.m_time.tm_year + 1900; else if (time.m_time.tm_year > 100) time.m_time.tm_year = time.m_time.tm_year -100 + 2000; return time; } // return runtime string -- timeElapsed = clock() - lStart AffxString AffxTime::getRuntime(AffxString strAction, double timeElapsed) { double milliElapsed = timeElapsed / CLOCKS_PER_SEC * 1000; double MILLISECONDS_PER_HOUR = 60 * 60 * 1000; double MILLISECONDS_PER_MINUTE = 60 * 1000; double MILLISECONDS_PER_SECOND = 1000; AffxString strTime; if (timeElapsed > MILLISECONDS_PER_HOUR) { strTime.sprintf("%.1lf hours", milliElapsed / MILLISECONDS_PER_HOUR); } else if (timeElapsed > MILLISECONDS_PER_MINUTE) { strTime.sprintf("%.1lf minutes", milliElapsed / MILLISECONDS_PER_MINUTE); } else { strTime.sprintf("%.1lf seconds", milliElapsed / MILLISECONDS_PER_SECOND); } return (strAction + " = " + strTime); } void AffxTime::startTime() { #ifdef WIN32 m_lStartTime = clock(); #else m_lStartTime = time(NULL); #endif } AffxString AffxTime::getRuntime(AffxString strAction) { #ifdef WIN32 m_lStopTime = clock(); return AffxTime::getRuntime(strAction, m_lStopTime - m_lStartTime); #else m_lStopTime = time(NULL); double timeElapsed = m_lStopTime - m_lStartTime; double SECONDS_PER_HOUR = 60 * 60; double SECONDS_PER_MINUTE = 60; AffxString strTime; if (timeElapsed > SECONDS_PER_HOUR) { strTime.sprintf("%.1lf hours", timeElapsed / SECONDS_PER_HOUR); } else if (timeElapsed > SECONDS_PER_MINUTE) { strTime.sprintf("%.1lf minutes", timeElapsed / SECONDS_PER_MINUTE); } else { strTime.sprintf("%.1lf seconds", timeElapsed); } return (strAction + " = " + strTime); #endif } ///////////////////////////////////////////////////////////////////////////// // AffxTimeSpan - relative time ///////////////////////////////////////////////////////////////////////////// // String formatting #define maxTimeBufferSize 128 // Verifies will fail if the needed buffer size is too large AffxString AffxTimeSpan::format(char* pFormat) const // formatting timespans is a little trickier than formatting AffxTimes // * we are only interested in relative time formats, ie. it is illegal // to format anything dealing with absolute time (i.e. years, months, // day of week, day of year, timezones, ...) // * the only valid formats: // %D - # of days -- NEW !!! // %H - hour in 24 hour format // %M - minute (0-59) // %S - seconds (0-59) // %% - percent sign { char szBuffer[maxTimeBufferSize]; char ch; char* pch = szBuffer; while ((ch = *pFormat++) != '\0') { if (ch == '%') { switch (ch = *pFormat++) { default: Err::errAbort("AffxTimeSpan, Bad format character."); // probably a bad format character case '%': *pch++ = ch; break; case 'D': pch += sprintf(pch, "%d", getDays()); break; case 'H': pch += sprintf(pch, "%02d", getHours()); break; case 'M': pch += sprintf(pch, "%02d", getMinutes()); break; case 'S': pch += sprintf(pch, "%02d", getSeconds()); break; } } else { *pch++ = ch; } } *pch = '\0'; return szBuffer; } void AffxTime::empty() { struct tm atm; memset((void*)&atm, 0, sizeof(tm)); atm.tm_isdst = -1; m_time = atm; } void AffxTimeSpan::empty() { m_timeSpan = 0; } // Calculate the DATE value. AffxTimeSpan::AffxTimeSpan(double dt) { // Convert from days to total seconds. m_timeSpan = (int)(dt * 24 * 60 * 60); } // Calculate the DATE value. double AffxTimeSpan::getDATE(void) { double dt = 0; int lDays = getDays(); int nHours = getHours(); int nMinutes = getMinutes(); int nSeconds = getSeconds(); // Set date span by breaking into fractional days (all input ranges valid) dt = lDays + ((double)nHours)/24 + ((double)nMinutes)/(24*60) + ((double)nSeconds)/(24*60*60); return dt; } // Calculate the DATE value. double AffxTime::getDATE(void) { double dt = 0; int wYear = getYear(); int wMonth = getMonth(); int wDay = getDay(); int wHour = getHour(); int wMinute = getMinute(); int wSecond = getSecond(); // Validate year and month (ignore day of week and milliseconds) if (wYear > 9999 || wMonth < 1 || wMonth > 12) return 0; // Check for leap year and set the number of days in the month bool bLeapYear = ((wYear & 3) == 0) && ((wYear % 100) != 0 || (wYear % 400) == 0); int nDaysInMonth = rgMonthDays[wMonth] - rgMonthDays[wMonth-1] + ((bLeapYear && wDay == 29 && wMonth == 2) ? 1 : 0); // Finish validating the date if (wDay < 1 || wDay > nDaysInMonth || wHour > 23 || wMinute > 59 || wSecond > 59) { return 0; } // Cache the date in days and time in fractional days int nDate; double dblTime; //It is a valid date; make Jan 1, 1AD be 1 nDate = wYear*365L + wYear/4 - wYear/100 + wYear/400 + rgMonthDays[wMonth-1] + wDay; // If leap year and it's before March, subtract 1: if (wMonth <= 2 && bLeapYear) --nDate; // Offset so that 12/30/1899 is 0 nDate -= 693959L; dblTime = (((int)wHour * 3600L) + // hrs in seconds ((int)wMinute * 60L) + // mins in seconds ((int)wSecond)) / 86400.; dt = (double) nDate + ((nDate >= 0) ? dblTime : -dblTime); return dt; } // Constructor for DATE. AffxTime::AffxTime(double dtSrc) { tm tm; memset((void*)&tm, 0, sizeof(tm)); tm.tm_isdst = -1; // The legal range does not actually span year 0 to 9999. if (dtSrc > MAX_DATE || dtSrc < MIN_DATE) // about year 100 to about 9999 return; int nDays; // Number of days since Dec. 30, 1899 int nDaysAbsolute; // Number of days since 1/1/0 int nSecsInDay; // Time in seconds since midnight int nMinutesInDay; // Minutes in day int n400Years; // Number of 400 year increments since 1/1/0 int n400Century; // Century within 400 year block (0,1,2 or 3) int n4Years; // Number of 4 year increments since 1/1/0 int n4Day; // Day within 4 year block // (0 is 1/1/yr1, 1460 is 12/31/yr4) int n4Yr; // Year within 4 year block (0,1,2 or 3) bool bLeap4 = true; // TRUE if 4 year block includes leap year double dblDate = dtSrc; // tempory serial date // If a valid date, then this conversion should not overflow nDays = (int)dblDate; // Round to the second dblDate += ((dtSrc > 0.0) ? HALF_SECOND : -HALF_SECOND); nDaysAbsolute = (int)dblDate + 693959L; // Add days from 1/1/0 to 12/30/1899 dblDate = fabs(dblDate); nSecsInDay = (int)((dblDate - floor(dblDate)) * 86400.); // Calculate the day of week (sun=1, mon=2...) // -1 because 1/1/0 is Sat. +1 because we want 1-based tm.tm_wday = (int)((nDaysAbsolute - 1) % 7L) + 1; // Leap years every 4 yrs except centuries not multiples of 400. n400Years = (int)(nDaysAbsolute / 146097L); // Set nDaysAbsolute to day within 400-year block nDaysAbsolute %= 146097L; // -1 because first century has extra day n400Century = (int)((nDaysAbsolute - 1) / 36524L); // Non-leap century if (n400Century != 0) { // Set nDaysAbsolute to day within century nDaysAbsolute = (nDaysAbsolute - 1) % 36524L; // +1 because 1st 4 year increment has 1460 days n4Years = (int)((nDaysAbsolute + 1) / 1461L); if (n4Years != 0) n4Day = (int)((nDaysAbsolute + 1) % 1461L); else { bLeap4 = false; n4Day = (int)nDaysAbsolute; } } else { // Leap century - not special case! n4Years = (int)(nDaysAbsolute / 1461L); n4Day = (int)(nDaysAbsolute % 1461L); } if (bLeap4) { // -1 because first year has 366 days n4Yr = (n4Day - 1) / 365; if (n4Yr != 0) n4Day = (n4Day - 1) % 365; } else { n4Yr = n4Day / 365; n4Day %= 365; } // n4Day is now 0-based day of year. Save 1-based day of year, year number tm.tm_yday = (int)n4Day + 1; tm.tm_year = n400Years * 400 + n400Century * 100 + n4Years * 4 + n4Yr; // Handle leap year: before, on, and after Feb. 29. if (n4Yr == 0 && bLeap4) { // Leap Year if (n4Day == 59) { /* Feb. 29 */ tm.tm_mon = 2; tm.tm_mday = 29; goto DoTime; } // Pretend it's not a leap year for month/day comp. if (n4Day >= 60) --n4Day; } // Make n4DaY a 1-based day of non-leap year and compute // month/day for everything but Feb. 29. ++n4Day; // Month number always >= n/32, so save some loop time */ for (tm.tm_mon = (n4Day >> 5) + 1; n4Day > rgMonthDays[tm.tm_mon]; tm.tm_mon++); tm.tm_mday = (int)(n4Day - rgMonthDays[tm.tm_mon-1]); DoTime: if (nSecsInDay == 0) tm.tm_hour = tm.tm_min = tm.tm_sec = 0; else { tm.tm_sec = (int)nSecsInDay % 60L; nMinutesInDay = nSecsInDay / 60L; tm.tm_min = (int)nMinutesInDay % 60; tm.tm_hour = (int)nMinutesInDay / 60; } m_time = tm; } // Format the date into a string. AffxString AffxTime::getTimeStampString(void) { AffxString str; if (!isEmpty()) { int iMonth = getMonth(); int iDay = getDay(); int iYear = getYear(); int iHour = getHour(); int iMinute = getMinute(); int iSecond = getSecond(); char szBuffer[64]; sprintf(szBuffer, "%02d/%02d/%d %02d:%02d:%02d", iMonth, iDay, iYear, iHour, iMinute, iSecond); str = szBuffer; } return str; } // Format the date into a string. AffxString AffxTime::getDateString(void) { AffxString str; if (!isEmpty()) { int iMonth = getMonth(); int iDay = getDay(); int iYear = getYear(); char szBuffer[64]; sprintf(szBuffer, "%d/%d/%d", iMonth, iDay, iYear); str = szBuffer; } return str; } // Format the time into a string. AffxString AffxTime::getHourMinuteString(void) { AffxString str; char szBuffer[64]; AffxString strAmPm = " AM"; if (!isEmpty()) { int iHour = getHour(); int iMinute = getMinute(); if (iHour >= 12) //pjo 13-Jan-97 equals sign added strAmPm = " PM"; if (iHour >= 13) //pjo 13-Jan-97 hour adjusted at 1 PM iHour -= 12; if (iHour == 0) iHour = 12; sprintf(szBuffer, "%d:", iHour); str = szBuffer; sprintf(szBuffer, "%2d", iMinute); if (szBuffer[0] == ' ') szBuffer[0] = '0'; str += szBuffer; str += strAmPm; return str; } return str; } affxparser/src/fusion/util/AffxTime.h0000644000175200017520000001753314516003651020672 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _AffxTime_h_ #define _AffxTime_h_ #include "util/AffxString.h" // #include // ///////////////////////////////////////////////////////////////////////////// // AffxTimeSpan and AffxTime class AffxTime; class AffxTimeSpan { public: // Constructors AffxTimeSpan(); AffxTimeSpan(time_t time); AffxTimeSpan(int lDays, int nHours, int nMins, int nSecs); AffxTimeSpan(double dt); AffxTimeSpan(const AffxTimeSpan& timeSpanSrc); const AffxTimeSpan& operator=(const AffxTimeSpan& timeSpanSrc); // Attributes // extract parts void empty(void); double getDATE(void); int getDays() const; // total # of days int getTotalHours() const; int getHours() const; int getTotalMinutes() const; int getMinutes() const; int getTotalSeconds() const; int getSeconds() const; // Operations // time math AffxTimeSpan operator-(AffxTimeSpan timeSpan) const; AffxTimeSpan operator+(AffxTimeSpan timeSpan) const; const AffxTimeSpan& operator+=(AffxTimeSpan timeSpan); const AffxTimeSpan& operator-=(AffxTimeSpan timeSpan); bool operator==(AffxTimeSpan timeSpan) const; bool operator!=(AffxTimeSpan timeSpan) const; bool operator<(AffxTimeSpan timeSpan) const; bool operator>(AffxTimeSpan timeSpan) const; bool operator<=(AffxTimeSpan timeSpan) const; bool operator>=(AffxTimeSpan timeSpan) const; AffxString format(char* pFormat) const; private: time_t m_timeSpan; friend class AffxTime; }; class AffxTime { public: static AffxTime getCurrentTime(); static AffxString getRuntime(AffxString strAction, double timeElapsed); static void startTime(); static AffxString getRuntime(AffxString strAction); static long m_lStartTime; static long m_lStopTime; public: // Constructors AffxTime(); AffxTime(time_t time); AffxTime(int nYear, int nMonth, int nDay, int nHour, int nMin, int nSec, int nDST = -1); AffxTime(double dtSrc); AffxTime(const AffxTime& timeSrc); const AffxTime& operator=(const AffxTime& timeSrc); void clear(void) {empty();} // No memory allocation to worry about. void empty(void); bool isEmpty(void) {return getDATE() == 0;} bool isClear(void) {return isEmpty();} // Attributes double getDATE(void); time_t getTime(); int getYear() const; int getMonth() const; // month of year (1 = Jan) int getDay() const; // day of month int getHour() const; int getMinute() const; int getSecond() const; int getDayOfWeek() const; // 1=Sun, 2=Mon, ..., 7=Sat // Operations // time math AffxTimeSpan operator-(AffxTime time); AffxTime operator-(AffxTimeSpan timeSpan); AffxTime operator+(AffxTimeSpan timeSpan); const AffxTime& operator+=(AffxTimeSpan timeSpan); const AffxTime& operator-=(AffxTimeSpan timeSpan); bool operator==(AffxTime time); bool operator!=(AffxTime time); bool operator<(AffxTime time); bool operator>(AffxTime time); bool operator<=(AffxTime time); bool operator>=(AffxTime time); // formatting using "C" strftime AffxString format(char* pFormat); AffxString formatGmt(char* pFormat); AffxString getDateString(void); AffxString getHourMinuteString(void); AffxString getTimeStampString(void); private: tm m_time; }; // AffxTime and AffxTimeSpan inline AffxTimeSpan::AffxTimeSpan(time_t time) { m_timeSpan = time; } inline AffxTimeSpan::AffxTimeSpan(int lDays, int nHours, int nMins, int nSecs) { m_timeSpan = nSecs + 60* (nMins + 60* (nHours + 24* lDays)); } inline AffxTimeSpan::AffxTimeSpan(const AffxTimeSpan& timeSpanSrc) { m_timeSpan = timeSpanSrc.m_timeSpan; } inline const AffxTimeSpan& AffxTimeSpan::operator=(const AffxTimeSpan& timeSpanSrc) { m_timeSpan = timeSpanSrc.m_timeSpan; return *this; } inline int AffxTimeSpan::getDays() const { return (int)m_timeSpan / (24*3600L); } inline int AffxTimeSpan::getTotalHours() const { return (int)m_timeSpan/3600; } inline int AffxTimeSpan::getHours() const { return (int)(getTotalHours() - getDays()*24); } inline int AffxTimeSpan::getTotalMinutes() const { return (int)m_timeSpan/60; } inline int AffxTimeSpan::getMinutes() const { return (int)(getTotalMinutes() - getTotalHours()*60); } inline int AffxTimeSpan::getTotalSeconds() const { return (int)m_timeSpan; } inline int AffxTimeSpan::getSeconds() const { return (int)(getTotalSeconds() - getTotalMinutes()*60); } inline AffxTimeSpan AffxTimeSpan::operator-(AffxTimeSpan timeSpan) const { return AffxTimeSpan(m_timeSpan - timeSpan.m_timeSpan); } inline AffxTimeSpan AffxTimeSpan::operator+(AffxTimeSpan timeSpan) const { return AffxTimeSpan(m_timeSpan + timeSpan.m_timeSpan); } inline const AffxTimeSpan& AffxTimeSpan::operator+=(AffxTimeSpan timeSpan) { m_timeSpan += timeSpan.m_timeSpan; return *this; } inline const AffxTimeSpan& AffxTimeSpan::operator-=(AffxTimeSpan timeSpan) { m_timeSpan -= timeSpan.m_timeSpan; return *this; } inline bool AffxTimeSpan::operator==(AffxTimeSpan timeSpan) const { return m_timeSpan == timeSpan.m_timeSpan; } inline bool AffxTimeSpan::operator!=(AffxTimeSpan timeSpan) const { return m_timeSpan != timeSpan.m_timeSpan; } inline bool AffxTimeSpan::operator<(AffxTimeSpan timeSpan) const { return m_timeSpan < timeSpan.m_timeSpan; } inline bool AffxTimeSpan::operator>(AffxTimeSpan timeSpan) const { return m_timeSpan > timeSpan.m_timeSpan; } inline bool AffxTimeSpan::operator<=(AffxTimeSpan timeSpan) const { return m_timeSpan <= timeSpan.m_timeSpan; } inline bool AffxTimeSpan::operator>=(AffxTimeSpan timeSpan) const { return m_timeSpan >= timeSpan.m_timeSpan; } inline const AffxTime& AffxTime::operator=(const AffxTime& timeSrc) { m_time = timeSrc.m_time; return *this; } inline int AffxTime::getYear() const { return (m_time.tm_year); } inline int AffxTime::getMonth() const { return m_time.tm_mon; } inline int AffxTime::getDay() const { return m_time.tm_mday; } inline int AffxTime::getHour() const { return m_time.tm_hour; } inline int AffxTime::getMinute() const { return m_time.tm_min; } inline int AffxTime::getSecond() const { return m_time.tm_sec; } inline int AffxTime::getDayOfWeek() const { return m_time.tm_wday + 1; } inline AffxTimeSpan AffxTime::operator-(AffxTime time) { return getDATE() - time.getDATE(); } inline AffxTime AffxTime::operator-(AffxTimeSpan timeSpan) { return AffxTime(getDATE() - timeSpan.getDATE()); } inline AffxTime AffxTime::operator+(AffxTimeSpan timeSpan) { return AffxTime(getDATE() + timeSpan.getDATE()); } inline const AffxTime& AffxTime::operator+=(AffxTimeSpan timeSpan) { *this = getDATE() + timeSpan.getDATE(); return *this; } inline const AffxTime& AffxTime::operator-=(AffxTimeSpan timeSpan) { *this = getDATE() - timeSpan.getDATE(); return *this; } inline bool AffxTime::operator==(AffxTime time) { return getDATE() == time.getDATE(); } inline bool AffxTime::operator!=(AffxTime time) { return getDATE() != time.getDATE(); } inline bool AffxTime::operator<(AffxTime time) { return getDATE() < time.getDATE(); } inline bool AffxTime::operator>(AffxTime time) { return getDATE() > time.getDATE(); } inline bool AffxTime::operator<=(AffxTime time) { return getDATE() <= time.getDATE(); } inline bool AffxTime::operator>=(AffxTime time) { return getDATE() >= time.getDATE(); } #endif // _AffxTime_h_ affxparser/src/fusion/util/AptErrno.h0000644000175200017520000000260314516003651020711 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2011 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // // k/apt-1/affy/sdk/util/AptErrno.h --- // // $Id$ // #ifndef _UTIL_APTERRNO_H_ #define _UTIL_APTERRNO_H_ /// Return codes for methods. enum AptErr_t { // dont use negative values as some systems mess up the signs. APT_OK=0, // a generic error, for when there isnt a more specific one. APT_ERR=1000, // APT_ERR_ISNULL, APT_ERR_NOTFOUND, APT_ERR_OUTOFBOUNDS, // APT_ERR_WRONGTYPE, // APT_ERR_ACCESS, APT_ERR_EXISTS, APT_ERR_FS_PERM, APT_ERR_FS_STAT, APT_ERR_NOTEXISTS, APT_ERR_NOTREMOVED, }; #endif // _UTIL_APTERRNO_H_ affxparser/src/fusion/util/AptVersionInfo.h0000644000175200017520000001064014516003651022065 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2010 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _APTVERSIONINFO_H_ #define _APTVERSIONINFO_H_ #include #include "util/Convert.h" // After including this file, all these will be defined. // They will problably be "unknown" unless AptVersionInfoGenerated.h // is around or they have been defined with "-D" on the command line. // In any event, they will be safe to use as strings. // "util/AptVersionInfoGenerated.h" should be generated by a shell script. // but we dont at the moment. // if APT_HAVE_APTVERSIONINFOGENERATED is set then we will pick it up. // The problem with generating the file is we dont want to // generate it each time we do a edit/compile cycle. // But in a Bamboo build, the data will be static. #ifdef APT_HAVE_APTVERSIONINFOGENERATED #include "util/AptVersionInfoGenerated.h" #endif // These are the backstops to make sure everything is defined // and to document the formats you can expect. // "APTHEADCI-HEADCIAMD64PCLINUX-397" #ifndef APT_VER_BAMBOO_BUILD #define APT_VER_BAMBOO_BUILD "na" #endif // "4.3.2 20081105 (Red Hat 4.3.2-7)" #ifndef APT_VER_COMPILE_CC_VERSION // ansi says this should be defined, but check anyways #ifdef __VERSION__ #define APT_VER_COMPILE_CC_VERSION __VERSION__ #else #define APT_VER_COMPILE_CC_VERSION "unknown" #endif #endif // "20091210-1501" #ifndef APT_VER_COMPILE_DATE #define APT_VER_COMPILE_DATE "unknown" #endif // "ostia", "parama" #ifndef APT_VER_COMPILE_HOST #define APT_VER_COMPILE_HOST "unknown" #endif // output from "uname" // "Linux", "Darwin" #ifndef APT_VER_COMPILE_OS #define APT_VER_COMPILE_OS "unknown" #endif // output from "id" // "rsatin", "harley" #ifndef APT_VER_COMPILE_USER #define APT_VER_COMPILE_USER "unknown" #endif // output from "uname -a" // Linux leno.ev.affymetrix.com 2.6.27.25-170.2.72.fc10.x86_64 #1 SMP Sun Jun 21 18:39:34 #ifndef APT_VER_COMPILE_OS_VERSION #define APT_VER_COMPILE_OS_VERSION "unknown" #endif // "1.10.2" "1.12.0" #ifndef APT_VER_RELEASE #define APT_VER_RELEASE "unknown" #endif // the output from "svnversion" // See "svnversion --help" for how to interpret this number. // "r12140M" #ifndef APT_VER_SVN_VERSION #define APT_VER_SVN_VERSION "unknown" #endif // where the checkout is rooted. // from "svn info" but not currently captured. // "svn://svn.ev.affymetrix.com/projects/apt/trunk/affy/sdk" #ifndef APT_VER_SVN_URL #define APT_VER_SVN_URL "unknown" #endif class AptVersionInfo { public: static std::string version() { return ToStr(APT_VER_RELEASE); } static std::string cvsId() { return (ToStr(APT_VER_SVN_URL) + " " + ToStr(APT_VER_SVN_VERSION)); } static std::string versionToReport() { return version() + " " + cvsId() + " " + ToStr(APT_VER_BAMBOO_BUILD); } static std::string reportBambooBuild() { return ToStr(APT_VER_BAMBOO_BUILD); } static std::string reportCompileCCVersion() { return ToStr(APT_VER_COMPILE_CC_VERSION); } static std::string reportCompileDate() { return ToStr(APT_VER_COMPILE_DATE); } static std::string reportCompileHost() { return ToStr(APT_VER_COMPILE_HOST); } static std::string reportCompileOS() { return ToStr(APT_VER_COMPILE_OS); } static std::string reportRelease() { return ToStr(APT_VER_RELEASE); } static std::string reportSVNVersion() { return ToStr(APT_VER_SVN_VERSION); } static std::string reportSVNURL() { return ToStr(APT_VER_SVN_URL); } }; #endif // _APTVERSIONINFO_H_ affxparser/src/fusion/util/BaseEngine.cpp0000644000175200017520000004144714516003651021523 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "util/BaseEngine.h" // #include "calvin_files/utils/src/StringUtils.h" #include "util/AffxConv.h" #include "util/AptVersionInfo.h" #include "util/Fs.h" #include "util/MsgSocketHandler.h" using namespace std; /* * The head of the engine data objects. */ EngineReg *EngineReg::m_Head = NULL; /* * Set the members. The head is the pointer to this class. */ EngineReg::EngineReg(const std::string &engName) : m_Next(m_Head) { name = engName; m_Head = this; } /* * Destroy the class. */ EngineReg::~EngineReg() { } /* * Create an engine based on the name. */ BaseEngine *EngineReg::CreateEngine(const std::string &name) { // Find the matching engine data object. for (EngineReg *p=m_Head; p != NULL; p = p->m_Next) { if (p->name == name) { BaseEngine *engine = p->MakeObject(); if (engine) { return engine; } } } return NULL; } /* * Gets the list of all registered engines. */ std::list EngineReg::GetEngineNames() { std::list names; for (EngineReg *p=m_Head; p != NULL; p = p->m_Next) { names.push_back(p->name); } return names; } BaseEngine::BaseEngine() : m_OptionsChecked(false), m_DiskChecked(false), m_created_new_tempdir(false) { defineOptions(); m_SocketHandler = NULL; /* Track our memory usage */ uint64_t freeRam = 0, totalRam = 0, swapAvail = 0, memAvail = 0; Util::memInfo(freeRam, totalRam, swapAvail, memAvail, false); setOpt("free-mem-at-start",ToStr(memAvail)); Verbose::out(3,"In Base Engine Constructor"); } BaseEngine::BaseEngine( char * argv[] ) : m_OptionsChecked(false), m_DiskChecked(false), m_created_new_tempdir(false) { m_SocketHandler = NULL; parseArgv(argv); } BaseEngine::~BaseEngine() { Freez(m_SocketHandler); Verbose::out(3,"In Base Engine Destructor"); } /** * Parse and set options from argv * * @param argv - arg vector */ int BaseEngine::parseArgv( const char * const * const argv, int start ) { int rv = Options::parseArgv( argv, start ); if (getOptBool("console-off")) { Verbose::Param p = Verbose::getParam(); // To trigger setting up defaults. Verbose::removeDefault(); } if (!getOpt("use-socket").empty()) { Verbose::out(1, "Setting up socket for reporting."); string url = getOpt("use-socket"); string host = url.substr(0, url.find(':')); string port = url.substr(url.find(':') + 1); int verbosity = getOptInt("verbose"); m_SocketHandler = new MsgSocketHandler(verbosity); #ifndef _MSC_VER m_SocketHandler->setEndOfLine("\n"); #endif m_SocketHandler->openSocket(host, port); Verbose::pushMsgHandler(m_SocketHandler); Verbose::pushProgressHandler(m_SocketHandler); Err::configureErrHandler(true, true, true, 0); } return rv; } std::string BaseEngine::getProgName() { return Options::getProgName(); } void BaseEngine::setLibFileOpt(const std::string &option) { setOpt(option,Fs::findLibFile(getOpt(option), getOpt("analysis-files-path"))); } void BaseEngine::openStandardLog(const std::string& log_name, std::ofstream& log_ofstream, LogStream& log_logstream) { if (!Fs::isWriteableDir(getOpt("out-dir"))) { if (Fs::mkdirPath(getOpt("out-dir")) != APT_OK) { Err::errAbort("Can't make or write to directory: '"+getOpt("out-dir")+"'"); } } std::string log_path; if (getOpt("log-file") != "") { log_path = getOpt("log-file"); } else { log_path = Fs::join(getOpt("out-dir"),log_name); } Fs::mustOpenToWrite(log_ofstream, log_path); log_logstream.setStream(&log_ofstream); Verbose::pushMsgHandler(&log_logstream); Verbose::pushProgressHandler(&log_logstream); Verbose::pushWarnHandler(&log_logstream); } std::vector > BaseEngine::getMetaDataDescription() { vector > pairVec; PgOpt *meta = getPgOpt("meta-data-info"); for (int i = 0; i < meta->getValueCount(); i++) { pair p; string param = meta->getValue(i); size_t pos = param.find('='); if (pos == string::npos) { Err::errAbort("meta-data-info value not in key=value pair: '" + param + "'"); } p.first = param.substr(0,pos); p.second = param.substr(pos+1); if (p.first.length() == 0) { Err::errAbort("meta-data-info value has empty key in key=value pair: '" + param + "'"); } if (p.second.length() == 0) { Err::errAbort("meta-data-info value has empty value in key=value pair: '" + param + "'"); } // APT-510: we dont want to enforce this as it prevents having "="s in filenames. // if (p.second.find('=') != string::npos) { // Err::errAbort("meta-data-info value has multiple '=' delimiters in key=value pair: '" + param + "'"); // } pairVec.push_back(p); } return pairVec; } void BaseEngine::checkOptions() { /* @todo commenting out until implementation finished setVersionOptions(); if (getOptBool("report-version-information")) { printEngineOptions("ReportVersionInformation "); Err::errAbort("The --report-version-information has been invoked. This will give you a listing of all options, including version data, and then cause the program to exit. To restore normal functionality remove this option from the command line input." ); } */ if (m_OptionsChecked) return; // Note what options we started with snapshotOptions("initial"); // Define additional state values -- those configured by base engine // The children will cal their defineStates() in checkOptionsImp() BaseEngine::defineStates(); if (getOptBool("throw-exception")) { Err::setThrowStatus(true); } // does the user want the version if (getOptBool("version")) { std::cout << "version: " << getOpt("version-to-report") << std::endl; exit(0); } // Do we need help? (I know I do...) else if (getOptBool("help")) { std::set toHide; optionUsage(toHide, true); extraHelp(); std::cout << "version: " << getOpt("version-to-report") << std::endl; exit(0); } checkOptionsImp(); m_OptionsChecked = true; if (getOptBool("throw-exception")) { ErrHandler *handler = Err::popHandler(); Freez(handler); } } /* @todo commenting out until implementation finished void BaseEngine::setVersionOptions(){ AptVersionInfo aptVersionInfo; setOpt("BAMBOO_BUILD", aptVersionInfo.reportBambooBuild()); setOpt("COMPILE_CC_VERSION", aptVersionInfo.reportCompileCCVersion()); setOpt("COMPILE_DATE", aptVersionInfo.reportCompileDate()); setOpt("COMPILE_HOST", aptVersionInfo.reportCompileHost()); setOpt("COMPILE_OS", aptVersionInfo.reportCompileOS()); setOpt("RELEASE", aptVersionInfo.reportRelease()); setOpt("SVN_VERSION", aptVersionInfo.reportSVNVersion()); setOpt("SVN_URL", aptVersionInfo.reportSVNVersion()); } */ void BaseEngine::checkDiskSpace() { checkOptions(); if (m_DiskChecked) return; if (getOptBool("throw-exception")) { Err::setThrowStatus(true); } checkDiskSpaceImp(); m_DiskChecked = true; if (getOptBool("throw-exception")) { ErrHandler *handler = Err::popHandler(); Freez(handler); } } /** * @brief Top level function to open up files and make sure things look ok then * start analysis pathway. */ void BaseEngine::run() { if (getOptBool("throw-exception")) { Err::setThrowStatus(true); } Verbose::out(1, "Running " + getEngineName() + "..."); printEngineOptions("Initial "); /* Check our options */ checkOptions(); checkDiskSpace(); /* Set some initial state info */ setOpt("time-start",Util::getTimeStamp()); time_t startTime = time(NULL); /* Do the analysis requested. */ Verbose::out(3,"Base Engine Before runImp()"); Util::pushMemFreeAtStart(); runImp(); Util::popMemFreeAtStart(); Verbose::out(3,"Base Engine After runImp()"); setOpt("time-end",Util::getTimeStamp()); time_t endTime = time(NULL); int t = int( (float)(endTime - startTime) / 60.0 * 100); // convert to minutes setOpt("time-run-minutes",ToStr((float)t/100)); Verbose::out(1, ToStr("Run took approximately: ") + ToStr((float)t/100) + ToStr(((float)t/100) > 1 ? " minutes." : " minute.")); printEngineOptions("Final "); Verbose::out(1, "Done running " + getEngineName() + "."); if (getOptBool("throw-exception")) { ErrHandler *handler = Err::popHandler(); Freez(handler); } if (m_SocketHandler != NULL) { Verbose::removeMsgHandler(m_SocketHandler); Verbose::removeProgressHandler(m_SocketHandler); m_SocketHandler->finishedMsg(); Freez(m_SocketHandler); } } void BaseEngine::defineOptions() { defineOptionSection("Common Options (not used by all programs)"); defineOption("h", "help", PgOpt::BOOL_OPT, "Display program options and extra documentation about possible analyses. " "See -explain for information about a specific operation.", "false"); defineOption("v", "verbose", PgOpt::INT_OPT, "How verbose to be with status messages 0 - quiet, 1 - usual messages, 2 - more messages.", "1"); defineOption("", "console-off", PgOpt::BOOL_OPT, "Turn off the default messages to the console but not logging or sockets.", "false"); defineOption("", "use-socket", PgOpt::STRING_OPT, "Host and port to print messages over in localhost:port format", ""); defineOption("", "version", PgOpt::BOOL_OPT, "Display version information.", "false"); defineOption("f", "force", PgOpt::BOOL_OPT, "Disable various checks including chip types. Consider using --chip-type option rather than --force.", "false"); defineOption("", "throw-exception", PgOpt::BOOL_OPT, "Throw an exception rather than calling exit() on error. Useful for debugging. This option is intended for command line use only. If you are wrapping an Engine and want exceptions thrown, then you should call Err::setThrowStatus(true) to ensure that all Err::errAbort() calls result in an exception.", "false"); defineOption("", "analysis-files-path", PgOpt::STRING_OPT, "Search path for analysis library files. Will override AFFX_ANALYSIS_FILES_PATH environment variable.", ""); defineOption("", "xml-file", PgOpt::STRING_OPT, "Input parameters in XML format (Will override command line settings).", ""); defineOption("","temp-dir", PgOpt::STRING_OPT, "Directory for temporary files when working off disk. Using network mounted drives is not advised. When not set, the output folder will be used. The defaut is typically the output directory or the current working directory.", ""); defineOption("o","out-dir", PgOpt::STRING_OPT, "Directory for output files. Defaults to current working directory.", "."); defineOption("","log-file", PgOpt::STRING_OPT, "The name of the log file. Generally defaults to the program name in the out-dir folder.", ""); defineOptionSection("Engine Options (Not used on command line)"); defineOption("","command-line", PgOpt::STRING_OPT, "The command line executed.", ""); defineOption("","exec-guid", PgOpt::STRING_OPT, "The GUID for the process.", ""); defineOption("","program-name", PgOpt::STRING_OPT, "The name of the program", ""); defineOption("","program-company", PgOpt::STRING_OPT, "The company providing the program", ""); defineOption("","program-version", PgOpt::STRING_OPT, "The version of the program", ""); defineOption("","program-cvs-id", PgOpt::STRING_OPT, "The CVS version of the program", ""); defineOption("","version-to-report", PgOpt::STRING_OPT, "The version to report in the output files.", ""); defineOption("", "free-mem-at-start", PgOpt::STRING_OPT, "How much physical memory was available when the engine run started.", "0"); defOptMult("","meta-data-info", PgOpt::STRING_OPT, "Meta data in key=value pair that will be output in headers.", ""); /* @todo commenting out until implementation finished defineOption("", "report-version-information", PgOpt::BOOL_OPT, "Reports version information, command line options and exits.", "false"); // The values that the following states are set to are created during the build process. // The information is initilized from environment variables in sdk/Makefile.defs and passed to the compiler // with the -D compiler option. The preprocessor run then sets them within the AptVersionInfo.h file // to the values set in the Makefile.defs. They are then available to the apt code base via the access // functions defined in AptVersionInfo.h // Note that all the names of the states are the names given in the #define's in the AptVersionInfo.h file // with the APT_VER prefix removed. This avoids any preprocessor overwrites. defineOption("","BAMBOO_BUILD", PgOpt::STRING_OPT, "The bamboo version used while building the binary", ""); defineOption("","COMPILE_CC_VERSION", PgOpt::STRING_OPT, "The version of the compiler used while building the binary.", ""); defineOption("","COMPILE_DATE", PgOpt::STRING_OPT, "The date on which the binary was built.", ""); defineOption("","COMPILE_HOST", PgOpt::STRING_OPT, "The host on which the binary was built.", ""); defineOption("","COMPILE_OS", PgOpt::STRING_OPT, "The operating system on which the binary was built.", ""); defineOption("","RELEASE", PgOpt::STRING_OPT, "The apt version eg. 1.12.0 set when the binary was built.", ""); defineOption("","SVN_VERSION", PgOpt::STRING_OPT, "The version number of the SVN repository from which the binary was built.", ""); defineOption("","SVN_URL", PgOpt::STRING_OPT, "The URL at which the SVN repository is rooted.", ""); */ } void BaseEngine::defineStates() { defineOption("", "time-start", PgOpt::STRING_OPT, "The time the engine run was started", ""); defineOption("", "time-end", PgOpt::STRING_OPT, "The time the engine run ended", ""); defineOption("", "time-run-minutes", PgOpt::STRING_OPT, "The run time in minutes.", ""); defineOption("","analysis-guid", PgOpt::STRING_OPT, "The GUID for the analysis run.", ""); } ////////// // @todo this is wrong "m_created_new_tempdir" is object state, // but the temp_dir path is being passed in; // temp_dir should be state as well. void BaseEngine::makeTempDir(std::string temp_dir) { m_created_new_tempdir=false; if (Fs::dirExists(temp_dir)==false) { m_created_new_tempdir=true; Fs::ensureWriteableDirPath(temp_dir); } if (Fs::isWriteableDir(temp_dir)==false) { Err::errAbort("Can't make or write to directory: "+FS_QUOTE_PATH(temp_dir)); } } void BaseEngine::removeTempDir(std::string temp_dir) { if (m_created_new_tempdir) { Fs::rmdir(temp_dir); } } affxparser/src/fusion/util/BaseEngine.h0000644000175200017520000001146614516003651021166 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file BaseEngine.h * @author Alan Williams * @date Mon Jun 23 14:57:34 PDT 2008 * * @brief base engine base class */ #ifndef _BASEENGINE_H_ #define _BASEENGINE_H_ // #include "portability/apt-win-dll.h" #include "util/LogStream.h" #include "util/Options.h" #include "util/PgOptions.h" #include "util/Util.h" // #include #include #include #include #include class MsgSocketHandler; /** @brief Base class for analysis engines */ class APTLIB_API BaseEngine : public Options { public: virtual std::string getEngineName() { return "BaseEngine"; } /** * Constructor */ BaseEngine(); /** * Constructor * @param argv - vector of args to parse */ BaseEngine ( char * argv[] ); /** * Destructor */ virtual ~BaseEngine(); /** * Run the engine -- entry point to run the engine * Will call Verbose::out() and Err::errAbort(). */ void run(); /** * Parse and set options from argv * * @param argv - arg vector */ virtual int parseArgv( const char * const * const argv, int start = 1 ); // @todo commenting out until implementation finished // /** // * Set the build versioning infomation. // * // */ // void setVersionOptions(); /** * Get the name of the Engine */ std::string getProgName(); /** * Check the validity of options specified */ void checkOptions(); /** * Query available disk space and compare it to estimated disk space * needed for temporary and CHP files */ void checkDiskSpace(); /** * Opens the log file in the normal way. * Cant take ownership of these as we done know when to close them. */ void openStandardLog(const std::string& log_name,std::ofstream& log_ofstream,LogStream& log_logstream); /** * Swap the option value to include the full path if found * @param option */ void setLibFileOpt(const std::string &option); /** * Get a vector of the key=value pairs specified via the "meta-data-info" * parameters * @return vector of pairs of strings with first string being key and second being the value */ std::vector< std::pair > getMetaDataDescription(); virtual void defineOptions(); protected: /** * Engine implementations in child * Will call Verbose::out() and Err::errAbort(). */ virtual void runImp() {} virtual void checkOptionsImp() {} virtual void checkDiskSpaceImp() {} virtual void extraHelp() {} void printEngineOptions() { printOptions(getEngineName() + ": "); } void printEngineOptions(const std::string &label) { printOptions(getEngineName() + ": " + label); } void makeTempDir(std::string temp_dir); void removeTempDir(std::string temp_dir); private: virtual void defineStates(); bool m_OptionsChecked; bool m_DiskChecked; /*! flag to indicate if tempdir should be deleted at end of run */ bool m_created_new_tempdir; MsgSocketHandler *m_SocketHandler; }; /*! A class used to self register engine data classes. */ class APTLIB_API EngineReg { public: /*! Constructor * @param engName The name of the engine to create. */ EngineReg(const std::string &engName); /*! Destructor */ virtual ~EngineReg(); /*! Creates an engine object. * @param name The name of the engine to create. * @return A pointer to the engine. NULL if the failed. */ static BaseEngine *CreateEngine(const std::string &name); /*! Gets the list of all registered engines. * @return The list of registered engines. */ static std::list GetEngineNames(); private: /*! Makes an engine data object. * @return The engine data object. */ virtual BaseEngine *MakeObject() = 0; /*! A pointer to the first registered engine. */ static EngineReg *m_Head; /*! A pointer to the next registered engine. */ EngineReg *m_Next; /*! The name of the engine. */ std::string name; }; #endif /* _BASEENGINE_H_ */ affxparser/src/fusion/util/CalvinChpCheck.h0000644000175200017520000017154314516003651021776 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file CalvinChpCheck.h * @author Chuck Sugnet * @date Tue Apr 25 18:03:02 2006 * * @brief Class for doing a check of two CHP files after regression run. */ #ifndef CALVINCHPCHECK_H #define CALVINCHPCHECK_H // #include "calvin_files/data/src/CHPData.h" #include "calvin_files/data/src/CHPMultiDataData.h" #include "calvin_files/parsers/src/CHPFileReader.h" #include "calvin_files/parsers/src/CHPMultiDataFileReader.h" #include "calvin_files/parsers/src/CHPQuantificationDetectionFileReader.h" #include "calvin_files/parsers/src/CHPQuantificationFileReader.h" #include "calvin_files/utils/src/StringUtils.h" #include "portability/affy-base-types.h" #include "util/AffxByteArray.h" #include "util/Fs.h" #include "util/RegressionCheck.h" #include "util/Util.h" #include "util/Verbose.h" // #include #include #include #include #include #include #include #include using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_data; /** * Class for testing that CHP files are the same +/- some epsilon. Also checks * to make sure that at least some of the headers are the same * (times/dates/versions change so not checked). */ class CalvinChpCheck : public RegressionCheck { public: /** * Constructor. * @param generated - vector of filenames to be tested. * @param gold - matching vector of filenames for gold standard data to test against. * @param diffAllowed - number of differences allowed * @param prefix - argument name prefix in header (default: "apt-") * @param eps - epsilon: Maximum accepted absolute difference in numeric values. * i.e. if |generated-gold| >= eps then there is a difference. * @param bCheckHeaders Check headers? (boolean, default: true). * @param frac - Maximum accepted fractional difference in numeric values (not used by default). * i.e. if |generated-gold| >= frac*max(|generated|,|gold|) then there is a difference. */ CalvinChpCheck( std::vector &generated, std::vector &gold, int diffAllowed=0, const std::wstring &prefix=L"apt-", double eps=0.0001, bool bCheckHeaders = true, double fraction = 0.0 ) { m_Name = "AGCC-CHP-Check"; m_Generated = generated; m_Gold = gold; m_EpsConfidence = eps; m_FracConfidence = fraction; m_EpsQuantification = eps; m_FracQuantification = fraction; m_CheckHeaders = bCheckHeaders; /// @todo allow for a vector of eps for checking extra metrics m_EpsParam = eps; m_FracParam = fraction; m_DiffAllowed = diffAllowed; m_Prefix = prefix; fillInToIgnore(m_IgnoreMap, prefix); // things know to change like user, time, etc. setMaxError(30); } CalvinChpCheck(const std::string &generated, const std::string &gold, int diffAllowed=0, const std::wstring &prefix=L"apt-", double eps=0.0001, bool bCheckHeaders = true, double fraction = 0.0) { m_Generated.push_back(generated); m_Gold.push_back(gold); m_EpsConfidence = eps; m_FracConfidence = fraction; m_EpsQuantification = eps; m_FracQuantification = fraction; m_CheckHeaders = bCheckHeaders; /// @todo allow for a vector of eps for checking extra metrics m_EpsParam = eps; m_FracParam = fraction; m_DiffAllowed = diffAllowed; m_Prefix = prefix; fillInToIgnore(m_IgnoreMap, prefix); // things know to change like user, time, etc. setMaxError(30); } /** * Utility function to set the max number of errors to report * @param max - maximum number of errors to report (-1 for no limit) * @return - void */ void setDiffReportMax(int max) { setMaxError(max); } /** * Check to make sure that two files are the same +/- some epsilon. * @param msg - Fills in an error message if test fails, empty string otherwise. * @return - Returns true if files are close enough, false otherwise. */ bool check(std::string &msg) { bool success = true; std::string current_filename; std::map validDataSets; validDataSets[CopyNumberMultiDataType] = true; validDataSets[GenotypeMultiDataType] = true; validDataSets[ExpressionMultiDataType] = true; validDataSets[CopyNumberVariationMultiDataType] = true; validDataSets[DmetCopyNumberMultiDataType] = true; validDataSets[DmetMultiAllelicMultiDataType] = true; validDataSets[DmetBiAllelicMultiDataType] = true; if(m_Generated.size() != m_Gold.size()) { return checkMsg(false, "CalvinChpCheck::check() - generated and gold vectors must be same size.",msg); } for (size_t i = 0; i < m_Generated.size(); i++) { try { m_Generated[i] = Fs::convertToUncPath(m_Generated[i]); m_Gold[i] = Fs::convertToUncPath(m_Gold[i]); CHPData chp1, chp2; CHPFileReader reader; reader.SetFilename(m_Generated[i]); current_filename=m_Generated[i]; reader.Read(chp1); reader.SetFilename(m_Gold[i]); current_filename=m_Gold[i]; reader.Read(chp2); std::string chp1Type = chp1.GetFileHeader()->GetGenericDataHdr()->GetFileTypeId(); std::string chp2Type = chp2.GetFileHeader()->GetGenericDataHdr()->GetFileTypeId(); success &= checkMsg(chp2Type == chp1Type, "Error: Different CHP Types: " + chp2Type + ", " + chp1Type, msg); if (m_CheckHeaders && !headersSame(chp2, chp1, msg)) success = false; if(chp1Type == CHP_MULTI_DATA_TYPE) { CHPMultiDataData generatedChp, goldChp; CHPMultiDataFileReader chpReader; chpReader.SetFilename(m_Generated[i]); current_filename=m_Generated[i]; chpReader.Read(generatedChp); chpReader.SetFilename(m_Gold[i]); current_filename=m_Gold[i]; chpReader.Read(goldChp); std::map dataSets; std::map::iterator dataIter; dataSets = generatedChp.GetDataSetInfo(); for(dataIter=dataSets.begin(); dataIter != dataSets.end(); dataIter++) { if(validDataSets.find(dataIter->first) == validDataSets.end() || !(validDataSets.find(dataIter->first)->second)){ success &= checkMsg(false, "Error: Unable to check multi data type '" + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataIter->first]) + "'", msg); } } if(!multiDataCopyNumberSame(goldChp, generatedChp, msg, CopyNumberMultiDataType)) { success = false; } if(!multiDataGenotypeSame(goldChp, generatedChp, msg, GenotypeMultiDataType)) { success = false; } if(!multiDataExpressionSame(goldChp, generatedChp, msg, ExpressionMultiDataType)) { success = false; } if(!multiDataCopyNumberVariationSame(goldChp, generatedChp, msg, CopyNumberVariationMultiDataType)) { success = false; } if(!multiDataDmetCopyNumberSame(goldChp, generatedChp, msg, DmetCopyNumberMultiDataType)) { success = false; } if(!multiDataDmetMultiAllelicSame(goldChp, generatedChp, msg, DmetMultiAllelicMultiDataType)) { success = false; } if(!multiDataDmetBiAllelicSame(goldChp, generatedChp, msg, DmetBiAllelicMultiDataType)) { success = false; } } else if(chp1Type == "affymetrix-quantification-analysis") { CHPQuantificationData generatedChp, goldChp; CHPQuantificationFileReader chpReader; chpReader.SetFilename(m_Generated[i]); current_filename=m_Generated[i]; chpReader.Read(generatedChp); chpReader.SetFilename(m_Gold[i]); current_filename=m_Gold[i]; chpReader.Read(goldChp); if(!quantificationDataSame(goldChp, generatedChp, msg)) { success = false; } } else if(chp1Type == "affymetrix-expression-probeset-analysis") { CHPQuantificationDetectionData generatedChp, goldChp; CHPQuantificationDetectionFileReader chpReader; chpReader.SetFilename(m_Generated[i]); current_filename=m_Generated[i]; chpReader.Read(generatedChp); chpReader.SetFilename(m_Gold[i]); current_filename=m_Gold[i]; chpReader.Read(goldChp); if(!quantificationDetectionDataSame(goldChp, generatedChp, msg)) { success = false; } } else if(chp1Type == CHP_GENOTYPING_ASSAY_TYPE) { success &= checkMsg(false, "Error: Unable to compare CHPs of type " + chp1Type, msg); } else if(chp1Type == CHP_UNIVERSAL_ASSAY_TYPE) { success &= checkMsg(false, "Error: Unable to compare CHPs of type " + chp1Type, msg); } else if(chp1Type == CHP_RESEQUENCING_ASSAY_TYPE) { success &= checkMsg(false, "Error: Unable to compare CHPs of type " + chp1Type, msg); } else { success &= checkMsg(false, "Error: Unable to compare CHPs of type " + chp1Type, msg); } } // end try catch(Except &e) { success &= checkMsg(false, "Error: " + ToStr(e.what()),msg); } catch(affymetrix_calvin_exceptions::CalvinException &ce) { success &= checkMsg(false, "Error: AGCC library exception: " + FS_QUOTE_PATH(current_filename) + StringUtils::ConvertWCSToMBS(ce.Description()),msg); } catch(const std::exception &e) { success &= checkMsg(false, "Error: standard exception: " + ToStr(e.what()),msg); } catch(...) { success &= checkMsg(false, "Error: Uncaught Exception.",msg); } } // end for return success; } /** * Clear out the default set of header entries to ignore */ void clearHeaderIgnore() { m_IgnoreMap.clear(); } /** * Add a header item to ignore * @param key - reference to a wide string */ void addHeaderIgnore(std::wstring &key) { m_IgnoreMap.insert(key); } private: // Header entries to ignore static void fillInToIgnore(std::set &ignoreMap, const std::wstring &prefix) { ignoreMap.clear(); ignoreMap.insert(L"program-version"); ignoreMap.insert(L"apt-opt-program-version"); ignoreMap.insert(prefix + L"exec-guid"); ignoreMap.insert(prefix + L"analysis-guid"); ignoreMap.insert(prefix + L"time-str"); ignoreMap.insert(prefix + L"free-mem"); ignoreMap.insert(prefix + L"cvs-id"); ignoreMap.insert(prefix + L"version"); ignoreMap.insert(prefix + L"opt-out-dir"); ignoreMap.insert(prefix + L"opt-temp-dir"); ignoreMap.insert(prefix + L"opt-exec-guid"); ignoreMap.insert(prefix + L"state-exec-guid"); ignoreMap.insert(prefix + L"opt-out-dir"); ignoreMap.insert(L"option-program-cvs-id"); ignoreMap.insert(L"option-version-to-report"); ignoreMap.insert(L"option-command-line"); ignoreMap.insert(L"option-exec-guid"); ignoreMap.insert(L"option-run-probeset-genotype"); ignoreMap.insert(L"option-cels"); ignoreMap.insert(L"option-out-dir"); ignoreMap.insert(L"option-temp-dir"); ignoreMap.insert(prefix + L"state-time-start"); ignoreMap.insert(prefix + L"state-free-mem-at-start"); ignoreMap.insert(prefix + L"state-analysis-guid"); ignoreMap.insert(prefix + L"opt-program-cvs-id"); ignoreMap.insert(prefix + L"opt-version-to-report"); ignoreMap.insert(prefix + L"opt-command-line"); ignoreMap.insert(prefix + L"state-program-cvs-id"); ignoreMap.insert(prefix + L"state-program-version"); ignoreMap.insert(prefix + L"state-version-to-report"); ignoreMap.insert(prefix + L"state-command-line"); ignoreMap.insert(prefix + L"command-line"); } /** * Check to see if two generic types are equivalent. If floats then * makes sure they are withing epsilon (small number). Otherwise * tested as strings which should work for other types. * @param gold - Type with "correct" values. * @param generated - Type with values to be tested. * @return - true if same, false otherwise. */ bool paramNamValTypeSame(ParameterNameValueType &gold, ParameterNameValueType &generated) { std::string goldName, genName, goldValue, genValue; /* For ease in debugger... */ goldName = StringUtils::ConvertWCSToMBS(gold.GetName()); genName = StringUtils::ConvertWCSToMBS(generated.GetName()); goldValue = StringUtils::ConvertWCSToMBS(gold.ToString()); genValue = StringUtils::ConvertWCSToMBS(generated.ToString()); // bool match = gold.GetName() == generated.GetName(); bool match = true; if(gold.GetParameterType() == ParameterNameValueType::FloatType) { float goldFloat = gold.GetValueFloat(); float genFloat = generated.GetValueFloat(); double maxDiff = 0; checkFloat(goldFloat, genFloat, m_EpsParam, match, maxDiff, false, m_FracParam); } // othewise compare as string. else { // ignore path separators. AffxByteArray baGold(StringUtils::ConvertWCSToMBS(gold.ToString())); AffxByteArray baGenerated(StringUtils::ConvertWCSToMBS(generated.ToString())); baGold.replace("\\", "/"); baGenerated.replace("\\", "/"); match &= baGold.toString() == baGenerated.toString(); } return match; } /** * Utility function for checking if two vectors of generic types are the same. * @param gold - Vector with the "correct" values. * @param generated - Vector to be tested against the gold values. * @return - true if same, false othewise. */ bool paramVectorsSame(std::vector &gold, std::vector &generated) { bool match = true; if(gold.size() != generated.size()) return false; for(size_t i = 0; i < gold.size(); i++) { match &= paramNamValTypeSame(gold[i], generated[i]); } return match; } bool ParameterNameValueTypeMostlySame(ParameterNameValueTypeVector &gold, ParameterNameValueTypeVector &test, std::string &msg, std::wstring &prefix) { bool same = true; std::map testMap; std::map::iterator testMapIter; ParameterNameValueTypeVector::iterator goldIter; ParameterNameValueTypeVector::iterator testIter; // Load up test as a map which will be queried by items in gold. for(testIter = test.begin(); testIter != test.end(); ++testIter) { testMap[testIter->GetName()] = *testIter; } for(goldIter = gold.begin(); goldIter != gold.end(); ++goldIter) { // ignore items that are in the ignoreMap if(m_IgnoreMap.find(goldIter->GetName()) == m_IgnoreMap.end()) { testMapIter = testMap.find(goldIter->GetName()); if(testMapIter == testMap.end()) { msg += " Error: Test missing field: '" + StringUtils::ConvertWCSToMBS(goldIter->GetName()) + "'"; same = false; } else { std::string goldName, genName, goldValue, genValue; /* For ease in debugger... */ goldName = StringUtils::ConvertWCSToMBS(goldIter->GetName()); genName = StringUtils::ConvertWCSToMBS(testMapIter->second.GetName()); goldValue = StringUtils::ConvertWCSToMBS(goldIter->ToString()); genValue = StringUtils::ConvertWCSToMBS(testMapIter->second.ToString()); if(!paramNamValTypeSame(testMapIter->second, *goldIter) && Fs::basename(genValue) != Fs::basename(goldValue)) { msg += " Error: for field '" + goldName + "' expecting: '" + goldValue + "' got: '" + genValue + "'"; same = false; } } } } return same; } /** * Are the headers for to CHP files the same? * Ignore some fields known to change like time, run id, etc. * @param goldChp - "correct" chp file. * @param generatedChp - chp file to be tested against gold standard. * @param msgs - ongoing list of messages about what when wrong if anything. * @return - true if headers are equivalent, false otherwise. */ bool headersSame(CHPData &goldChp, CHPData &generatedChp, std::string &msgs) { bool success = true; FileHeader *goldHeader = NULL, *genHeader = NULL; goldHeader = goldChp.GetFileHeader(); genHeader = generatedChp.GetFileHeader(); /* Check the version. */ success &= checkMsg(goldHeader->GetVersion() == genHeader->GetVersion(), "Error: different chp versions.", msgs); /* Check the group numbers. */ success &= checkMsg(goldHeader->GetNumDataGroups() == genHeader->GetNumDataGroups(), "Error: Different group counts.", msgs); /* What type of array chp is from. */ success &= checkMsg(goldChp.GetArrayType() == generatedChp.GetArrayType(), "Error: Different array types. [" + StringUtils::ConvertWCSToMBS(goldChp.GetArrayType()) + " != " + StringUtils::ConvertWCSToMBS(generatedChp.GetArrayType()) + "]", msgs); /* Algorithm used. */ success &= checkMsg(goldChp.GetAlgName() == generatedChp.GetAlgName(), "Error: Different algorithm names.", msgs); /* Algorithm version. */ success &= checkMsg(goldChp.GetAlgVersion() == generatedChp.GetAlgVersion(), "Error: Different algorithm version.", msgs); ParameterNameValueTypeVector goldList = goldChp.GetChipSums(); ParameterNameValueTypeVector genList = generatedChp.GetChipSums(); success &= ParameterNameValueTypeMostlySame(goldList, genList, msgs, m_Prefix); goldList = goldChp.GetAlgParams(); genList = generatedChp.GetAlgParams(); success &= ParameterNameValueTypeMostlySame(goldList, genList, msgs, m_Prefix); return success; } void checkMultiDataDmetCopyNumberEntry(DmetCopyNumberData &goldEntry, DmetCopyNumberData &generatedEntry, bool &localSuccess, double &maxConfDiff, double &maxSignalDiff, int &numDiff) { if(goldEntry.name != generatedEntry.name) { localSuccess = false; reportError("Different names. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name); } if(goldEntry.call != generatedEntry.call) { localSuccess = false; reportError("Different calls for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.call) + "' test: '" + ToStr(generatedEntry.call) + "'"); } if(goldEntry.force != generatedEntry.force) { localSuccess = false; reportError("Different forced calls for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.force) + "' test: '" + ToStr(generatedEntry.force) + "'"); } if(goldEntry.lower != generatedEntry.lower) { localSuccess = false; reportError("Different lower for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.lower) + "' test: '" + ToStr(generatedEntry.lower) + "'"); } if(goldEntry.upper != generatedEntry.upper) { localSuccess = false; reportError("Different upper for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.upper) + "' test: '" + ToStr(generatedEntry.upper) + "'"); } checkFloat(goldEntry.confidence, generatedEntry.confidence, m_EpsConfidence, localSuccess, maxConfDiff, false, m_FracConfidence); checkFloat(goldEntry.estimate, generatedEntry.estimate, m_EpsConfidence, localSuccess, maxSignalDiff, false, m_FracConfidence); if(!paramVectorsSame(goldEntry.metrics, generatedEntry.metrics)) { localSuccess = false; reportError("Param vectors different for snp: '" + goldEntry.name + "'."); } if(!localSuccess) { numDiff++; } } void checkMultiDataDmetMultiAllelicEntry(DmetMultiAllelicData &goldEntry, DmetMultiAllelicData &generatedEntry, bool &localSuccess, double &maxConfDiff, double &maxSignalDiff, int &numDiff) { if(goldEntry.name != generatedEntry.name) { localSuccess = false; reportError("Different names. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name); } if(goldEntry.call != generatedEntry.call) { localSuccess = false; reportError("Different calls for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.call) + "' test: '" + ToStr(generatedEntry.call) + "'"); } if(goldEntry.force != generatedEntry.force) { localSuccess = false; reportError("Different forced calls for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.force) + "' test: '" + ToStr(generatedEntry.force) + "'"); } if(goldEntry.contextA != generatedEntry.contextA) { localSuccess = false; reportError("Different contextA for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.contextA) + "' test: '" + ToStr(generatedEntry.contextA) + "'"); } if(goldEntry.contextB != generatedEntry.contextB) { localSuccess = false; reportError("Different contextB for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.contextB) + "' test: '" + ToStr(generatedEntry.contextB) + "'"); } if(goldEntry.contextC != generatedEntry.contextC) { localSuccess = false; reportError("Different contextC for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.contextC) + "' test: '" + ToStr(generatedEntry.contextC) + "'"); } if(goldEntry.contextD != generatedEntry.contextD) { localSuccess = false; reportError("Different contextD for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.contextD) + "' test: '" + ToStr(generatedEntry.contextD) + "'"); } if(goldEntry.contextE != generatedEntry.contextE) { localSuccess = false; reportError("Different contextE for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.contextE) + "' test: '" + ToStr(generatedEntry.contextE) + "'"); } if(goldEntry.contextF != generatedEntry.contextF) { localSuccess = false; reportError("Different contextF for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.contextF) + "' test: '" + ToStr(generatedEntry.contextF) + "'"); } if(goldEntry.alleleCount != generatedEntry.alleleCount) { localSuccess = false; reportError("Different alleleCount for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.alleleCount) + "' test: '" + ToStr(generatedEntry.alleleCount) + "'"); } checkFloat(goldEntry.confidence, generatedEntry.confidence, m_EpsConfidence, localSuccess, maxConfDiff, false, m_FracConfidence); checkFloat(goldEntry.signalA, generatedEntry.signalA, m_EpsConfidence, localSuccess, maxSignalDiff, false, m_FracConfidence); checkFloat(goldEntry.signalB, generatedEntry.signalB, m_EpsConfidence, localSuccess, maxSignalDiff, false, m_FracConfidence); checkFloat(goldEntry.signalC, generatedEntry.signalC, m_EpsConfidence, localSuccess, maxSignalDiff, false, m_FracConfidence); checkFloat(goldEntry.signalD, generatedEntry.signalD, m_EpsConfidence, localSuccess, maxSignalDiff, false, m_FracConfidence); checkFloat(goldEntry.signalE, generatedEntry.signalE, m_EpsConfidence, localSuccess, maxSignalDiff, false, m_FracConfidence); checkFloat(goldEntry.signalF, generatedEntry.signalF, m_EpsConfidence, localSuccess, maxSignalDiff, false, m_FracConfidence); if(!paramVectorsSame(goldEntry.metrics, generatedEntry.metrics)) { localSuccess = false; reportError("Param vectors different for snp: '" + goldEntry.name + "'."); } if(!localSuccess) { numDiff++; } } void checkMultiDataDmetBiAllelicEntry(DmetBiAllelicData &goldEntry, DmetBiAllelicData &generatedEntry, bool &localSuccess, double &maxConfDiff, double &maxSignalDiff, int &numDiff) { if(goldEntry.name != generatedEntry.name) { localSuccess = false; reportError("Different names. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name); } if(goldEntry.call != generatedEntry.call) { localSuccess = false; reportError("Different calls for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.call) + "' test: '" + ToStr(generatedEntry.call) + "'"); } if(goldEntry.force != generatedEntry.force) { localSuccess = false; reportError("Different forced calls for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.force) + "' test: '" + ToStr(generatedEntry.force) + "'"); } if(goldEntry.contextA != generatedEntry.contextA) { localSuccess = false; reportError("Different contextA for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.contextA) + "' test: '" + ToStr(generatedEntry.contextA) + "'"); } if(goldEntry.contextB != generatedEntry.contextB) { localSuccess = false; reportError("Different contextB for cnv: '" + goldEntry.name + "'. gold: '" + ToStr(goldEntry.contextB) + "' test: '" + ToStr(generatedEntry.contextB) + "'"); } checkFloat(goldEntry.confidence, generatedEntry.confidence, m_EpsConfidence, localSuccess, maxConfDiff, false, m_FracConfidence); checkFloat(goldEntry.signalA, generatedEntry.signalA, m_EpsConfidence, localSuccess, maxSignalDiff, false, m_FracConfidence); checkFloat(goldEntry.signalB, generatedEntry.signalB, m_EpsConfidence, localSuccess, maxSignalDiff, false, m_FracConfidence); if(!paramVectorsSame(goldEntry.metrics, generatedEntry.metrics)) { localSuccess = false; reportError("Param vectors different for snp: '" + goldEntry.name + "'."); } if(!localSuccess) { numDiff++; } } /** * Determine if two gentoyping chp file entries are equivalent. * @param goldEntry - "correct" entry. * @param generatedEntry - entry to be tested against gold standard. * @param localSuccess - set to to false if entries not equivalent. * @param maxDiff - maximum difference seen so far. * @param numDiff - number of differences seen so far, incremented if these entries not equivalent. */ void checkMultiDataGenotypeEntry(ProbeSetMultiDataGenotypeData &goldEntry, ProbeSetMultiDataGenotypeData &generatedEntry, bool &localSuccess, double &maxDiff, int &numDiff) { if(goldEntry.name != generatedEntry.name) { localSuccess = false; reportError("Different names. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name); } if(goldEntry.call != generatedEntry.call) { localSuccess = false; reportError("Different calls for snp: '" + goldEntry.name + "'. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name + "'"); } if(!checkFloat(goldEntry.confidence, generatedEntry.confidence, m_EpsConfidence, localSuccess, maxDiff, false, m_FracConfidence)) { reportError("Confidence is different for snp: '" + goldEntry.name + "'."); } if(!paramVectorsSame(goldEntry.metrics, generatedEntry.metrics)) { localSuccess = false; reportError("Param vectors different for snp: '" + goldEntry.name + "'."); } if(!localSuccess) { numDiff++; } } /** * Determine if two expression chp file entries are equivalent. * @param goldEntry - "correct" entry. * @param generatedEntry - entry to be tested against gold standard. * @param localSuccess - set to to false if entries not equivalent. * @param maxDiff - maximum difference seen so far. * @param numDiff - number of differences seen so far, incremented if these entries not equivalent. */ void checkMultiDataExpressionEntry(ProbeSetMultiDataExpressionData &goldEntry, ProbeSetMultiDataExpressionData &generatedEntry, bool &localSuccess, double &maxDiff, int &numDiff) { if(goldEntry.name != generatedEntry.name) { localSuccess = false; reportError("Different names. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name + "'"); } checkFloat(goldEntry.quantification, generatedEntry.quantification, m_EpsQuantification, localSuccess, maxDiff, false, m_FracQuantification); if(!paramVectorsSame(goldEntry.metrics, generatedEntry.metrics)) { localSuccess = false; reportError("Param vectors different for probeset: '" + goldEntry.name + "'."); } if(!localSuccess) { numDiff++; } } /** * Determine if two copnumber chp file entries are equivalent. * @param goldEntry - "correct" entry. * @param generatedEntry - entry to be tested against gold standard. * @param localSuccess - set to to false if entries not equivalent. * @param maxLog2RatioDiff - maximum difference seen so far. * @param maxSmoothSignalDiff - maximum difference seen so far. * @param maxAllelicDifferenceDiff - maximum difference seen so far. * @param numDiff - number of differences seen so far, incremented if these entries not equivalent. */ void checkMultiDataCopyNumberEntry(ProbeSetMultiDataCopyNumberData &goldEntry, ProbeSetMultiDataCopyNumberData &generatedEntry, bool &localSuccess, double &maxLog2RatioDiff, double &maxSmoothSignalDiff, double &maxAllelicDifferenceDiff, int &numDiff) { if(goldEntry.name != generatedEntry.name) { localSuccess = false; reportError("Different names. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name); } if(goldEntry.chr != generatedEntry.chr) { localSuccess = false; reportError("Different Chromosome for snp: '" + goldEntry.name + "'. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name + "'"); } if(goldEntry.position != generatedEntry.position) { localSuccess = false; reportError("Different Position for snp: '" + goldEntry.name + "'. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name + "'"); } if (goldEntry.metrics.at(0).GetValueFloat() != generatedEntry.metrics.at(0).GetValueFloat()) { localSuccess = false; reportError("Different CNState for snp: '" + goldEntry.name + "'. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name + "'"); } if (localSuccess) checkFloat(goldEntry.metrics.at(1).GetValueFloat(), generatedEntry.metrics.at(1).GetValueFloat(), m_EpsConfidence, localSuccess, maxLog2RatioDiff, false, m_FracConfidence); if (localSuccess) checkFloat(goldEntry.metrics.at(2).GetValueFloat(), generatedEntry.metrics.at(2).GetValueFloat(), m_EpsConfidence, localSuccess, maxSmoothSignalDiff, false, m_FracConfidence); if (localSuccess) checkFloat(goldEntry.metrics.at(4).GetValueFloat(), generatedEntry.metrics.at(4).GetValueFloat(), m_EpsConfidence, localSuccess, maxAllelicDifferenceDiff, false, m_FracConfidence); if ((goldEntry.metrics.at(3).GetValueFloat() == 0) || (goldEntry.metrics.at(3).GetValueFloat() == 1) || (generatedEntry.metrics.at(3).GetValueFloat() == 0) || (generatedEntry.metrics.at(3).GetValueFloat() == 1)) { if (goldEntry.metrics.at(3).GetValueFloat() != generatedEntry.metrics.at(3).GetValueFloat()) { localSuccess = false; reportError("Different LOH for snp: '" + goldEntry.name + "'. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name + "'"); } } if (!localSuccess) numDiff++; } /** * Determine if two cnv chp file entries are equivalent. * @param goldEntry - "correct" entry. * @param generatedEntry - entry to be tested against gold standard. * @param localSuccess - set to to false if entries not equivalent. * @param maxSignalDiff - maximum difference seen so far. * @param maxConfidenceDiff - maximum difference seen so far. * @param numDiff - number of differences seen so far, incremented if these entries not equivalent. */ void checkMultiDataCopyNumberVariationEntry(ProbeSetMultiDataCopyNumberVariationRegionData &goldEntry, ProbeSetMultiDataCopyNumberVariationRegionData &generatedEntry, bool &localSuccess, double &maxSignalDiff, double &maxConfidenceDiff, int &numDiff) { if(goldEntry.name != generatedEntry.name) { localSuccess = false; reportError("Different names. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name); } if(goldEntry.call != generatedEntry.call) { localSuccess = false; reportError("Different calls. gold: '" + ToStr(goldEntry.call) + "' test: '" + ToStr(generatedEntry.call)); } if (localSuccess) { checkFloat(goldEntry.signal, generatedEntry.signal, m_EpsConfidence, localSuccess, maxSignalDiff, false, m_FracConfidence); if(!localSuccess) reportError("Different signals. gold: '" + ToStr(goldEntry.signal) + "' test: '" + ToStr(generatedEntry.signal)); } if (localSuccess){ checkFloat(goldEntry.confidenceScore, generatedEntry.confidenceScore, m_EpsConfidence, localSuccess, maxConfidenceDiff, false, m_FracConfidence); if(!localSuccess) reportError("Different confidences. gold: '" + ToStr(goldEntry.confidenceScore) + "' test: '" + ToStr(generatedEntry.confidenceScore)); } if (!localSuccess) numDiff++; } /** * Determine if two expression chp file entries are equivalent. * @param goldEntry - "correct" entry. * @param generatedEntry - entry to be tested against gold standard. * @param localSuccess - set to to false if entries not equivalent. * @param maxDiff - maximum difference seen so far. * @param numDiff - number of differences seen so far, incremented if these entries not equivalent. */ void checkQuantificationEntry(ProbeSetQuantificationData &goldEntry, ProbeSetQuantificationData &generatedEntry, bool &localSuccess, double &maxDiff, int &numDiff) { if(goldEntry.name != generatedEntry.name) { localSuccess = false; reportError("Different names. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name + "'"); } if(goldEntry.id != generatedEntry.id) { localSuccess = false; reportError("Different ids. gold: '" + ToStr(goldEntry.id) + "' test: '" + ToStr(generatedEntry.id) + "'"); } checkFloat(goldEntry.quantification, generatedEntry.quantification, m_EpsQuantification, localSuccess, maxDiff, false, m_FracQuantification); if(!localSuccess) { numDiff++; } } void checkQuantificationDetectionEntry(ProbeSetQuantificationDetectionData &goldEntry, ProbeSetQuantificationDetectionData &generatedEntry, bool &localSuccess, double &maxDiff, int &numDiff, double &pvalMaxDiff) { if(goldEntry.name != generatedEntry.name) { localSuccess = false; reportError("Different names. gold: '" + goldEntry.name + "' test: '" + generatedEntry.name + "'"); } if(goldEntry.id != generatedEntry.id) { localSuccess = false; reportError("Different ids. gold: '" + ToStr(goldEntry.id) + "' test: '" + ToStr(generatedEntry.id) + "'"); } checkFloat(goldEntry.quantification, generatedEntry.quantification, m_EpsQuantification, localSuccess, maxDiff, false, m_FracQuantification); checkFloat(goldEntry.pvalue, generatedEntry.pvalue, m_EpsConfidence, localSuccess, pvalMaxDiff, false, m_FracConfidence); if(!localSuccess) { numDiff++; } } bool multiDataDmetCopyNumberSame(CHPMultiDataData &goldChp, CHPMultiDataData &generatedChp, std::string &msgs, MultiDataType dataType) { bool success = true; double maxDiff = -1; double maxSignalDiff = -1; int numDiff = 0; int goldNum = goldChp.GetEntryCount(dataType); int genNum = generatedChp.GetEntryCount(dataType); assert(dataType == DmetCopyNumberMultiDataType); Verbose::out(4, ToStr(goldNum) + " " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + " probesets."); if(!checkMsg(goldNum == genNum, "Wrong number of genotyping probesets.", msgs)) return false; // if nothing to check, skip it. if(goldNum == 0) return true; for(int i = 0; i < goldNum; i++) { bool localSuccess = true; DmetCopyNumberData goldEntry, generatedEntry; goldChp.GetEntry(dataType, i, goldEntry); generatedChp.GetEntry(dataType, i, generatedEntry); checkMultiDataDmetCopyNumberEntry(goldEntry, generatedEntry, localSuccess, maxDiff, maxSignalDiff, numDiff); success &= localSuccess; } if(maxDiff > m_EpsConfidence) Verbose::out(1, "Max diff: " + ToStr(maxDiff) + " is greater than expected (" + ToStr(m_EpsConfidence) + ") [confidence]"); if(maxSignalDiff > m_EpsConfidence) Verbose::out(1, "Max diff: " + ToStr(maxSignalDiff) + " is greater than expected (" + ToStr(m_EpsConfidence) + ") [signal]"); if(numDiff > 0) Verbose::out(1, ToStr(numDiff) + " of " + ToStr(goldNum) + " (" + ToStr(100.0 * numDiff/goldNum) + "%) were different."); if(!success && m_DiffAllowed >= numDiff) success = true; if(!success) { msgs += "Error: " + generatedChp.GetFilename() + " is different from " + goldChp.GetFilename() + ". "; } std::string res = "different"; if(success) res = "equivalent"; Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": checked " + ToStr(goldNum) + " genotype entries."); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": max confidence diff is: " + ToStr(maxDiff)); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": max signal diff is: " + ToStr(maxSignalDiff)); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": chip is " + res + "."); return success; } bool multiDataDmetMultiAllelicSame(CHPMultiDataData &goldChp, CHPMultiDataData &generatedChp, std::string &msgs, MultiDataType dataType) { bool success = true; double maxDiff = -1; double maxSignalDiff = -1; int numDiff = 0; int goldNum = goldChp.GetEntryCount(dataType); int genNum = generatedChp.GetEntryCount(dataType); assert(dataType == DmetMultiAllelicMultiDataType); Verbose::out(4, ToStr(goldNum) + " " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + " probesets."); if(!checkMsg(goldNum == genNum, "Wrong number of genotyping probesets.", msgs)) return false; // if nothing to check, skip it. if(goldNum == 0) return true; for(int i = 0; i < goldNum; i++) { bool localSuccess = true; DmetMultiAllelicData goldEntry, generatedEntry; goldChp.GetEntry(dataType, i, goldEntry); generatedChp.GetEntry(dataType, i, generatedEntry); checkMultiDataDmetMultiAllelicEntry(goldEntry, generatedEntry, localSuccess, maxDiff, maxSignalDiff, numDiff); success &= localSuccess; } if(maxDiff > m_EpsConfidence) Verbose::out(1, "Max diff: " + ToStr(maxDiff) + " is greater than expected (" + ToStr(m_EpsConfidence) + ") [confidence]"); if(maxSignalDiff > m_EpsConfidence) Verbose::out(1, "Max diff: " + ToStr(maxSignalDiff) + " is greater than expected (" + ToStr(m_EpsConfidence) + ") [signal]"); if(numDiff > 0) Verbose::out(1, ToStr(numDiff) + " of " + ToStr(goldNum) + " (" + ToStr(100.0 * numDiff/goldNum) + "%) were different."); if(!success && m_DiffAllowed >= numDiff) success = true; if(!success) { msgs += "Error: " + generatedChp.GetFilename() + " is different from " + goldChp.GetFilename() + ". "; } std::string res = "different"; if(success) res = "equivalent"; Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": checked " + ToStr(goldNum) + " genotype entries."); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": max confidence diff is: " + ToStr(maxDiff)); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": max signal diff is: " + ToStr(maxSignalDiff)); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": chip is " + res + "."); return success; } bool multiDataDmetBiAllelicSame(CHPMultiDataData &goldChp, CHPMultiDataData &generatedChp, std::string &msgs, MultiDataType dataType) { bool success = true; double maxDiff = -1; double maxSignalDiff = -1; int numDiff = 0; int goldNum = goldChp.GetEntryCount(dataType); int genNum = generatedChp.GetEntryCount(dataType); assert(dataType == DmetBiAllelicMultiDataType); Verbose::out(4, ToStr(goldNum) + " " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + " probesets."); if(!checkMsg(goldNum == genNum, "Wrong number of genotyping probesets.", msgs)) return false; // if nothing to check, skip it. if(goldNum == 0) return true; for(int i = 0; i < goldNum; i++) { bool localSuccess = true; DmetBiAllelicData goldEntry, generatedEntry; goldChp.GetEntry(dataType, i, goldEntry); generatedChp.GetEntry(dataType, i, generatedEntry); checkMultiDataDmetBiAllelicEntry(goldEntry, generatedEntry, localSuccess, maxDiff, maxSignalDiff, numDiff); success &= localSuccess; } if(maxDiff > m_EpsConfidence) Verbose::out(1, "Max diff: " + ToStr(maxDiff) + " is greater than expected (" + ToStr(m_EpsConfidence) + ") [confidence]"); if(maxSignalDiff > m_EpsConfidence) Verbose::out(1, "Max diff: " + ToStr(maxSignalDiff) + " is greater than expected (" + ToStr(m_EpsConfidence) + ") [signal]"); if(numDiff > 0) Verbose::out(1, ToStr(numDiff) + " of " + ToStr(goldNum) + " (" + ToStr(100.0 * numDiff/goldNum) + "%) were different."); if(!success && m_DiffAllowed >= numDiff) success = true; if(!success) { msgs += "Error: " + generatedChp.GetFilename() + " is different from " + goldChp.GetFilename() + ". "; } std::string res = "different"; if(success) res = "equivalent"; Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": checked " + ToStr(goldNum) + " genotype entries."); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": max confidence diff is: " + ToStr(maxDiff)); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": max signal diff is: " + ToStr(maxSignalDiff)); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": chip is " + res + "."); return success; } /** * Check to see if the copynumber entries for MultiDataType are the same. * @param goldChp - "correct" data. * @param generatedChp - chp file data to test against gold standard. * @param msgs - any ongoing messages for user. * @param dataType - Either genotype or control genotype, they are conceptually separate in chp file. * @return - true if genotype entries are equivalent, false otherwise. */ bool multiDataCopyNumberSame(CHPMultiDataData &goldChp, CHPMultiDataData &generatedChp, std::string &msgs, MultiDataType dataType) { bool success = true; double maxLog2RatioDiff = -1; double maxSmoothSignalDiff = -1; double maxAllelicDifferenceDiff = -1; int numDiff = 0; int numGenotyping = goldChp.GetEntryCount(dataType); int genNum = generatedChp.GetEntryCount(dataType); assert(dataType == CopyNumberMultiDataType); Verbose::out(4, ToStr(numGenotyping) + " " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + " copynumber probesets."); if(!checkMsg(numGenotyping == genNum, "Wrong number of copynumber probesets.", msgs)) return false; // if nothing to check, skip it. if(numGenotyping == 0) return true; for(int i = 0; i < numGenotyping; i++) { bool localSuccess = true; ProbeSetMultiDataCopyNumberData goldEntry, generatedEntry; goldChp.GetCopyNumberEntry(dataType, i, goldEntry); generatedChp.GetCopyNumberEntry(dataType, i, generatedEntry); checkMultiDataCopyNumberEntry(goldEntry, generatedEntry, localSuccess, maxLog2RatioDiff, maxSmoothSignalDiff, maxAllelicDifferenceDiff, numDiff); success &= localSuccess; } if(maxLog2RatioDiff > m_EpsConfidence) Verbose::out(1, "Max diff: " + ToStr(maxLog2RatioDiff) + " is greater than expected (" + ToStr(m_EpsConfidence) + ") [Log2Ratio]"); if(maxSmoothSignalDiff > m_EpsConfidence) Verbose::out(1, "Max diff: " + ToStr(maxSmoothSignalDiff) + " is greater than expected (" + ToStr(m_EpsConfidence) + ") [SmoothSignal]"); if(maxLog2RatioDiff > m_EpsConfidence) Verbose::out(1, "Max diff: " + ToStr(maxAllelicDifferenceDiff) + " is greater than expected (" + ToStr(m_EpsConfidence) + ") [AllelicDifference]"); if(numDiff > 0) Verbose::out(1, ToStr(numDiff) + " of " + ToStr(numGenotyping) + " (" + ToStr(100.0 * numDiff/numGenotyping) + "%) were different."); if(!success && m_DiffAllowed >= numDiff) success = true; if(!success) { msgs += "Error: " + generatedChp.GetFilename() + " is different from " + goldChp.GetFilename() + ". "; } std::string res = "different"; if(success) res = "equivalent"; Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": checked " + ToStr(numGenotyping) + " copynumber entries."); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": max Log2Ratio diff is: " + ToStr(maxLog2RatioDiff)); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": max SmoothSignal diff is: " + ToStr(maxSmoothSignalDiff)); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": max AlleleicDifference diff is: " + ToStr(maxAllelicDifferenceDiff)); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": chip is " + res + "."); return success; } /** * Check to see if the copynumber entries for MultiDataType are the same. * @param goldChp - "correct" data. * @param generatedChp - chp file data to test against gold standard. * @param msgs - any ongoing messages for user. * @param dataType - Either genotype or control genotype, they are conceptually separate in chp file. * @return - true if genotype entries are equivalent, false otherwise. */ bool multiDataCopyNumberVariationSame(CHPMultiDataData &goldChp, CHPMultiDataData &generatedChp, std::string &msgs, MultiDataType dataType) { bool success = true; int numDiff = 0; double maxConfidenceDiff = -1; double maxSignalDiff = -1; int goldNum = goldChp.GetEntryCount(dataType); int genNum = generatedChp.GetEntryCount(dataType); assert(dataType == CopyNumberVariationMultiDataType); Verbose::out(4, ToStr(goldNum) + " " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + " CNV probesets."); if(!checkMsg(goldNum == genNum, "Wrong number of CNV probesets.", msgs)) return false; // if nothing to check, skip it. if(goldNum == 0) return true; for(int i = 0; i < goldNum; i++) { bool localSuccess = true; ProbeSetMultiDataCopyNumberVariationRegionData goldEntry, generatedEntry; goldChp.GetCopyNumberVariationEntry(dataType, i, goldEntry); generatedChp.GetCopyNumberVariationEntry(dataType, i, generatedEntry); checkMultiDataCopyNumberVariationEntry(goldEntry, generatedEntry, localSuccess, maxSignalDiff, maxConfidenceDiff, numDiff); success &= localSuccess; } if(maxSignalDiff > m_EpsConfidence) Verbose::out(1, "Max diff: " + ToStr(maxSignalDiff) + " is greater than expected (" + ToStr(m_EpsConfidence) + ") [signal]"); if(maxConfidenceDiff > m_EpsConfidence) Verbose::out(1, "Max diff: " + ToStr(maxConfidenceDiff) + " is greater than expected (" + ToStr(m_EpsConfidence) + ") [confidence]"); if(numDiff > 0) Verbose::out(1, ToStr(numDiff) + " of " + ToStr(goldNum) + " (" + ToStr(100.0 * numDiff/goldNum) + "%) were different."); if(!success && m_DiffAllowed >= numDiff) success = true; if(!success) { msgs += "Error: " + generatedChp.GetFilename() + " is different from " + goldChp.GetFilename() + ". "; } std::string res = "different"; if(success) res = "equivalent"; Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": checked " + ToStr(goldNum) + " CNV entries."); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": max Signal diff is: " + ToStr(maxSignalDiff)); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": max Confidence diff is: " + ToStr(maxConfidenceDiff)); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": chip is " + res + "."); return success; } /** * Check to see if the genotype entries for MultiDataType are the same. * @param goldChp - "correct" data. * @param generatedChp - chp file data to test against gold standard. * @param msgs - any ongoing messages for user. * @param dataType - Either genotype or control genotype, they are conceptually separate in chp file. * @return - true if genotype entries are equivalent, false otherwise. */ bool multiDataGenotypeSame(CHPMultiDataData &goldChp, CHPMultiDataData &generatedChp, std::string &msgs, MultiDataType dataType) { bool success = true; double maxDiff = -1; int numDiff = 0; int numGenotyping = goldChp.GetEntryCount(dataType); int genNum = generatedChp.GetEntryCount(dataType); assert(dataType == GenotypeMultiDataType); Verbose::out(4, ToStr(numGenotyping) + " " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + " genotyping probesets."); if(!checkMsg(numGenotyping == genNum, "Wrong number of genotyping probesets.", msgs)) return false; // if nothing to check, skip it. if(numGenotyping == 0) return true; for(int i = 0; i < numGenotyping; i++) { bool localSuccess = true; ProbeSetMultiDataGenotypeData goldEntry, generatedEntry; goldChp.GetGenotypeEntry(dataType, i, goldEntry); generatedChp.GetGenotypeEntry(dataType, i, generatedEntry); checkMultiDataGenotypeEntry(goldEntry, generatedEntry, localSuccess, maxDiff, numDiff); success &= localSuccess; } if(maxDiff > m_EpsConfidence) Verbose::out(1, "Max diff: " + ToStr(maxDiff) + " is greater than expected (" + ToStr(m_EpsConfidence) + ") [confidence]"); if(numDiff > 0) Verbose::out(1, ToStr(numDiff) + " of " + ToStr(numGenotyping) + " (" + ToStr(100.0 * numDiff/numGenotyping) + "%) were different."); if(!success && m_DiffAllowed >= numDiff) success = true; if(!success) { msgs += "Error: " + generatedChp.GetFilename() + " is different from " + goldChp.GetFilename() + ". "; } std::string res = "different"; if(success) res = "equivalent"; Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": checked " + ToStr(numGenotyping) + " genotype entries."); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": max confidence diff is: " + ToStr(maxDiff)); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": chip is " + res + "."); return success; } /** * Check to see if the expression entries for MultiDataType are the same. * @param goldChp - "correct" data. * @param generatedChp - chp file data to test against gold standard. * @param msgs - any ongoing messages for user. * @param dataType - Either expression or control expression, they are conceptually separate in chp file. * @return - true if expression entries are equivalent, false otherwise. */ bool multiDataExpressionSame(CHPMultiDataData &goldChp, CHPMultiDataData &generatedChp, std::string &msgs, MultiDataType dataType) { bool success = true; double maxDiff = -1; int numDiff = 0; assert(dataType == ExpressionMultiDataType); int numExpr = goldChp.GetEntryCount(dataType); int genNum = generatedChp.GetEntryCount(dataType); Verbose::out(4, ToStr(numExpr) + " " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + " expression probesets."); if(!checkMsg(numExpr == genNum, "Wrong number of expression probesets.", msgs)) return false; // if nothing to check, skip it. if(numExpr == 0) return true; for(int i = 0; i < numExpr; i++) { bool localSuccess = true; ProbeSetMultiDataExpressionData goldEntry, generatedEntry; goldChp.GetExpressionEntry(dataType, i, goldEntry); generatedChp.GetExpressionEntry(dataType, i, generatedEntry); checkMultiDataExpressionEntry(goldEntry, generatedEntry, localSuccess, maxDiff, numDiff); success &= localSuccess; } if(maxDiff > m_EpsQuantification) Verbose::out(1, "Max diff: " + ToStr(maxDiff) + " is greater than expected (" + ToStr(m_EpsQuantification) + ") [quantification]"); if(numDiff > 0 ) Verbose::out(1, ToStr(numDiff) + " of " + ToStr(numExpr) + " (" + ToStr(100.0 * numDiff/numExpr) + "%) were different."); if(!success) { msgs += "Error: " + generatedChp.GetFilename() + " is different from " + goldChp.GetFilename() + ". "; } std::string res = "different"; if(success) res = "equivalent"; Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": checked " + ToStr(numExpr) + " expression entries."); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": max signal diff is: " + ToStr(maxDiff)); Verbose::out(1, generatedChp.GetFilename() + ": " + StringUtils::ConvertWCSToMBS(MultiDataDataSetNames[dataType]) + ": chip is " + res + "."); return success; } /** * Check to see if the expression entries for Quantification are the same. * @param goldChp - "correct" data. * @param generatedChp - chp file data to test against gold standard. * @param msgs - any ongoing messages for user. * @return - true if expression entries are equivalent, false otherwise. */ bool quantificationDataSame(CHPQuantificationData &goldChp, CHPQuantificationData &generatedChp, std::string &msgs) { bool success = true; double maxDiff = -1; int numDiff = 0; int numExpr = goldChp.GetEntryCount(); int genNum = generatedChp.GetEntryCount(); Verbose::out(4, ToStr(numExpr) + " expression probesets."); if(!checkMsg(numExpr == genNum, "Wrong number of expression probesets.", msgs)) return false; // if nothing to check, skip it. if(numExpr == 0) return true; for(int i = 0; i < numExpr; i++) { bool localSuccess = true; ProbeSetQuantificationData goldEntry, generatedEntry; goldChp.GetQuantificationEntry(i, goldEntry); generatedChp.GetQuantificationEntry(i, generatedEntry); checkQuantificationEntry(goldEntry, generatedEntry, localSuccess, maxDiff, numDiff); success &= localSuccess; } if(maxDiff > m_EpsQuantification) Verbose::out(1, "Max diff: " + ToStr(maxDiff) + " is greater than expected (" + ToStr(m_EpsQuantification) + ") [quantification]"); if(numDiff > 0 ) Verbose::out(1, ToStr(numDiff) + " of " + ToStr(numExpr) + " (" + ToStr(100.0 * numDiff/numExpr) + "%) were different."); if(!success) { msgs += "Error: " + generatedChp.GetFilename() + " is different from " + goldChp.GetFilename() + ". "; } std::string res = "different"; if(success) res = "equivalent"; Verbose::out(1, generatedChp.GetFilename() + ": checked " + ToStr(numExpr) + " quantification entries."); Verbose::out(1, generatedChp.GetFilename() + ": max signal diff is: " + ToStr(maxDiff)); Verbose::out(1, generatedChp.GetFilename() + ": chip is " + res + "."); return success; } /** * Check to see if the expression entries for Quantification are the same. * @param goldChp - "correct" data. * @param generatedChp - chp file data to test against gold standard. * @param msgs - any ongoing messages for user. * @return - true if expression entries are equivalent, false otherwise. */ bool quantificationDetectionDataSame(CHPQuantificationDetectionData &goldChp, CHPQuantificationDetectionData &generatedChp, std::string &msgs) { bool success = true; double maxDiff = -1; double pvalMaxDiff = -1; int numDiff = 0; int numExpr = goldChp.GetEntryCount(); int genNum = generatedChp.GetEntryCount(); Verbose::out(4, ToStr(numExpr) + " expression probesets."); if(!checkMsg(numExpr == genNum, "Wrong number of expression probesets.", msgs)) return false; // if nothing to check, skip it. if(numExpr == 0) return true; for(int i = 0; i < numExpr; i++) { bool localSuccess = true; ProbeSetQuantificationDetectionData goldEntry, generatedEntry; goldChp.GetQuantificationDetectionEntry(i, goldEntry); generatedChp.GetQuantificationDetectionEntry(i, generatedEntry); checkQuantificationDetectionEntry(goldEntry, generatedEntry, localSuccess, maxDiff, numDiff, pvalMaxDiff); success &= localSuccess; } if(maxDiff > m_EpsQuantification) Verbose::out(1, "Max diff: " + ToStr(maxDiff) + " is greater than expected (" + ToStr(m_EpsQuantification) + ") [quantification]"); if(numDiff > 0 ) Verbose::out(1, ToStr(numDiff) + " of " + ToStr(numExpr) + " (" + ToStr(100.0 * numDiff/numExpr) + "%) were different."); if(!success) { msgs += "Error: " + generatedChp.GetFilename() + " is different from " + goldChp.GetFilename() + ". "; } std::string res = "different"; if(success) res = "equivalent"; Verbose::out(1, generatedChp.GetFilename() + ": checked " + ToStr(numExpr) + " quantification entries."); Verbose::out(1, generatedChp.GetFilename() + ": max signal diff is: " + ToStr(maxDiff)); Verbose::out(1, generatedChp.GetFilename() + ": max pval diff is: " + ToStr(pvalMaxDiff)); Verbose::out(1, generatedChp.GetFilename() + ": chip is " + res + "."); return success; } /// Filenames for the gold standard or correct data chp files. std::vector m_Gold; /// Matching filenames for the chp files to be tested. std::vector m_Generated; /// Epsilon, small value that two floats can differ by but still be considered equivalent. double m_EpsConfidence; double m_FracConfidence; double m_EpsQuantification; double m_FracQuantification; double m_EpsParam; double m_FracParam; bool m_CheckHeaders; /// How many differences will we tolerate? int m_DiffAllowed; /// What is the expected prefix for parameter names? std::wstring m_Prefix; /// Header entries to ignore std::set m_IgnoreMap; }; #endif /* CALVINCHPCHECK_H */ affxparser/src/fusion/util/CalvinToText.cpp0000644000175200017520000003520614516003651022103 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 1989, 1991 Free Software Foundation, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "util/CalvinToText.h" // #include "broadutil/BroadException.h" #include "calvin_files/exception/src/ExceptionBase.h" #include "util/Fs.h" #include "util/Util.h" using namespace std; using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; // Constructor. CalvinToTextFile::CalvinToTextFile() { m_cb = NULL; m_pfileStream = NULL; } // Destructor. CalvinToTextFile::~CalvinToTextFile() { if (isOpen()) { m_pfileStream->close(); delete m_pfileStream; } if (m_cb != NULL) {delete[] m_cb;} } bool CalvinToTextFile::isOpen() { return ((m_pfileStream != NULL) && (m_pfileStream->is_open())); } void CalvinToTextFile::close(void) { if (isOpen()) { m_pfileStream->close(); delete m_pfileStream; m_pfileStream = NULL; if (m_cb != NULL) {delete[] m_cb; m_cb = NULL;} } } // Open the file. bool CalvinToTextFile::open(const AffxString& strFileName ) { m_pfileStream = new std::fstream; Fs::aptOpen(*m_pfileStream, strFileName, fstream::out | fstream::binary | fstream::trunc); if (!isOpen()) { delete m_pfileStream; m_pfileStream = NULL; } return isOpen(); } void CalvinToTextFile::write(const char *psz) { if (m_pfileStream != NULL) { m_pfileStream->write(psz, (std::streamsize)strlen(psz)); } } void CalvinToTextFile::writeLine(const AffxString& str) { std::ostringstream line; line << str << std::endl; write(line.str().c_str()); } /** * Error handling for the class. * @param strMessage - The error messagae to report. */ void CalvinToText::error(const AffxString& strMessage) { Err::errAbort(strMessage); } void CalvinToText::run(const std::string& strFileNameIn, const std::string& strFileNameOut, bool bHeader, bool bBody, bool bNewGuid) { try { m_strFileNameIn = Fs::convertToUncPath(strFileNameIn); m_strFileNameOut = Fs::convertToUncPath(strFileNameOut); if (isCalvinFile(m_strFileNameIn)) { AffxByteArray chpFileName; chpFileName.assign(m_strFileNameIn); Verbose::out(1, "Writing file: " + m_strFileNameOut); if (m_file.open(m_strFileNameOut) ){ m_bHeader = bHeader; affymetrix_calvin_io::GenericData genericData; OutputHeader(m_strFileNameIn, genericData, bNewGuid); if (bBody){ OutputGroupsAndSets(genericData); } m_file.close(); } else { Err::errAbort("A problem occurred while processing file (Cannot open output file) " + m_strFileNameIn); } } } //When things go wrong see if we can die gracefully here. catch(Except &e) { Verbose::out(0,""); error("Exception caught. " "Message is: " + ToStr(e.what())); } catch(const std::bad_alloc &e) { Verbose::out(0,""); error("std::bad_alloc caught. " "The application has run out of memory, or the calvin file is malformed." "Message is: " + ToStr(e.what())); } catch(affymetrix_calvin_exceptions::CalvinException &ce) { Verbose::out(0,""); error("affymetrix_calvin_exceptions::CalvinException caught. " "Affymetrix GeneChip Command Console library has thrown an exception. " "Message is: " + affymetrix_calvin_utilities::StringUtils::ConvertWCSToMBS(ce.Description())); } catch(BroadException &e) { Verbose::out(0,""); error("BroadException caught. " "Message is: '" + ToStr(e.m_msg) + "' source file: '" + ToStr(e.m_sourcefile) + ":" + ToStr(e.m_sourceline) + "'"); } catch(const std::exception &e) { Verbose::out(0,""); error("std::exception caught. " "Message is: " + ToStr(e.what())); } catch(...) { Verbose::out(0,""); error("Unknown exception caught. " "No message is available."); } } /* * Read the file into a data object. */ bool CalvinToText::ReadFile(const string &fileName, GenericData &gdata) { GenericFileReader reader; // Read the file. reader.SetFilename(fileName); try { reader.ReadHeader(gdata); } catch(...) { return false; } return true; } /* * Output the data to the command line. */ void CalvinToText::OutputData(GenericData& gdata, int groupIndex, int setIndex, int startRow, int endRow) { DataSet *set = gdata.DataSet(groupIndex, setIndex); set->Open(); // Output the rows of data int8_t i8; u_int8_t iu8; int16_t i16; u_int16_t iu16; int32_t i32; u_int32_t iu32; float f32; std::string s; std::wstring ws; int nRows = set->Rows(); int nCols = set->Cols(); startRow = Min(Max(startRow, 0), nRows-1); if (startRow < 0) {startRow = 0;} if (endRow < 0) {endRow = nRows-1;} endRow = Min(endRow, nRows-1); AffxString strColumnName; int iDetectionColumn = -1; int iGenotypeCallColumn = -1; int iForcedGenotypeCallColumn = -1; int iChromosomeColumn = -1; if (nCols > 0) { for (int col=0; colHeader().GetColumnInfo(col).GetName()).c_str(); if (strColumnName == "Detection") {iDetectionColumn = col;} else if (strColumnName == "Call") {iGenotypeCallColumn = col;} else if (strColumnName == "Forced Call") {iForcedGenotypeCallColumn = col;} else if (strColumnName == "Chromosome") {iChromosomeColumn = col;} m_file.write(strColumnName.c_str()); } m_file.writeLine(""); } for (int row=startRow; row<=endRow; row++) { for (int col=0; colHeader().GetColumnInfo(col).GetColumnType()) { case ByteColType: set->GetData(row, col, i8); m_file.write(::getInt((int)i8).c_str()); break; case UByteColType: set->GetData(row, col, iu8); if (col == iDetectionColumn) { switch(iu8) { case 0: m_file.write("P"); break; case 1: m_file.write("M"); break; case 2: m_file.write("A"); break; case 3: m_file.write("N"); break; default: m_file.write(::getInt((int)iu8).c_str()); } } else if ((col == iGenotypeCallColumn) || (col == iForcedGenotypeCallColumn)) { switch(iu8) { case 6: m_file.write("AA"); break; case 8: m_file.write("AB"); break; case 7: m_file.write("BB"); break; case 11: m_file.write("NC"); break; default: m_file.write(::getInt((int)iu8).c_str()); } } else if (col == iChromosomeColumn) { if ((m_iXChromosome != -1) && (m_iXChromosome == iu8)) {m_file.write("X");} else if ((m_iXChromosome != -1) && (m_iYChromosome == iu8)) {m_file.write("Y");} else {m_file.write(::getInt((int)iu8).c_str());} } else {m_file.write(::getInt((int)iu8).c_str());} break; case ShortColType: set->GetData(row, col, i16); m_file.write(::getInt(i16).c_str()); break; case UShortColType: set->GetData(row, col, iu16); m_file.write(::getInt(iu16).c_str()); break; case IntColType: set->GetData(row, col, i32); m_file.write(::getInt(i32).c_str()); break; case UIntColType: set->GetData(row, col, iu32); m_file.write(::getUnsignedInt(iu32).c_str()); break; case FloatColType: set->GetData(row, col, f32); m_file.write(::getDouble(f32, 6).c_str()); break; case ASCIICharColType: set->GetData(row, col, s); m_file.write(s.c_str()); break; case UnicodeCharColType: set->GetData(row, col, ws); m_file.write(StringUtils::ConvertWCSToMBS(ws).c_str()); break; default: break; } } m_file.writeLine(""); } set->Close(); set->Delete(); } /* * Output the file header parameters. */ void CalvinToText::OutputFileHeaderParameters(GenericData &gdata, bool bNewGuid) { // cout << "#File Header Information" << endl; if (m_bHeader) { if (bNewGuid) { m_file.writeLine("#%File=" + m_strFileNameOut); m_file.writeLine("#%FileCreationTime=" + StringUtils::ConvertWCSToMBS(DateTime::GetCurrentDateTime().ToString())); } else { m_file.writeLine("#%File=" + m_strFileNameIn); m_file.writeLine("#%FileSize=" + ::getUnsignedInt(Fs::fileSize(m_strFileNameIn))); if (gdata.Header().GetGenericDataHdr()->GetFileCreationTime().length() > 0) { m_file.writeLine("#%FileCreationTime=" + StringUtils::ConvertWCSToMBS(gdata.Header().GetGenericDataHdr()->GetFileCreationTime())); } // Output the magic and version numbers. m_file.writeLine("#%Magic=" + ::getInt((int)gdata.Header().GetMagicNumber())); m_file.writeLine("#%Version=" + ::getInt((int)gdata.Header().GetVersion())); } } } /* * Output the header parameters. */ void CalvinToText::OutputParameters(ParameterNameValueTypeIt &begin, ParameterNameValueTypeIt &end) { ParameterNameValueType param; ParameterNameValueTypeIt it; for (it=begin; it!=end; ++it) { param = *it; if (m_bHeader) { m_file.writeLine("#%" + StringUtils::ConvertWCSToMBS(param.GetName()) + "=" + StringUtils::ConvertWCSToMBS(param.ToString())); } if (StringUtils::ConvertWCSToMBS(param.GetName()) == "affymetrix-algorithm-param-xChromosome") {m_iXChromosome = ::getInt(StringUtils::ConvertWCSToMBS(param.ToString()));} if (StringUtils::ConvertWCSToMBS(param.GetName()) == "affymetrix-algorithm-param-yChromosome") {m_iYChromosome = ::getInt(StringUtils::ConvertWCSToMBS(param.ToString()));} if (StringUtils::ConvertWCSToMBS(param.GetName()) == "affymetrix-algorithm-param-option-xChromosome") {m_iXChromosome = ::getInt(StringUtils::ConvertWCSToMBS(param.ToString()));} if (StringUtils::ConvertWCSToMBS(param.GetName()) == "affymetrix-algorithm-param-option-yChromosome") {m_iYChromosome = ::getInt(StringUtils::ConvertWCSToMBS(param.ToString()));} } } /* * Output the header parameters. */ void CalvinToText::OutputParameters(ParameterNameValueTypeConstIt &begin, ParameterNameValueTypeConstIt &end) { ParameterNameValueType param; ParameterNameValueTypeConstIt it; for (it=begin; it!=end; ++it) { param = *it; if (m_bHeader) { m_file.writeLine("#%" + StringUtils::ConvertWCSToMBS(param.GetName()) + "=" +StringUtils::ConvertWCSToMBS(param.ToString())); } if (StringUtils::ConvertWCSToMBS(param.GetName()) == "affymetrix-algorithm-param-xChromosome") {m_iXChromosome = ::getInt(StringUtils::ConvertWCSToMBS(param.ToString()));} if (StringUtils::ConvertWCSToMBS(param.GetName()) == "affymetrix-algorithm-param-yChromosome") {m_iYChromosome = ::getInt(StringUtils::ConvertWCSToMBS(param.ToString()));} if (StringUtils::ConvertWCSToMBS(param.GetName()) == "affymetrix-algorithm-param-option-xChromosome") {m_iXChromosome = ::getInt(StringUtils::ConvertWCSToMBS(param.ToString()));} if (StringUtils::ConvertWCSToMBS(param.GetName()) == "affymetrix-algorithm-param-option-yChromosome") {m_iYChromosome = ::getInt(StringUtils::ConvertWCSToMBS(param.ToString()));} } } /* * Output the data header parameters. */ void CalvinToText::OutputDataHeaderParameters(GenericDataHeader *hdr, bool bNewGuid) { // cout << "#Data Header Information" << endl; if (m_bHeader) { // Add the file id's. if (hdr->GetFileId().length() > 0) { if (bNewGuid) { m_file.writeLine("#%FileIdentifier=" + affxutil::Guid::GenerateNewGuid()); } else { m_file.writeLine("#%FileIdentifier=" + hdr->GetFileId()); } } if (hdr->GetFileTypeId().length() > 0) { m_file.writeLine("#%FileTypeIdentifier=" + hdr->GetFileTypeId()); } if (hdr->GetLocale().length() > 0) { m_file.writeLine("#%FileLocale=" + StringUtils::ConvertWCSToMBS(hdr->GetLocale())); } } // Add the parameters. ParameterNameValueTypeIt begin; ParameterNameValueTypeIt end; hdr->GetNameValIterators(begin, end); OutputParameters(begin, end); int nParents = hdr->GetParentCnt(); for (int iParent=0; iParentGetParent(iParent); OutputDataHeaderParameters(&parent, false); } } /* * Output the set information, only the header. */ void CalvinToText::OutputSetInfo(GenericData &gdata, int groupIndex, int setIndex) { DataSet *pSet = gdata.DataSet(groupIndex, setIndex); // if (!g_bNoHeader) { m_file.writeLine("#%SetName=" + StringUtils::ConvertWCSToMBS(pSet->Header().GetName())); // Add the row and column count. m_file.writeLine("#%Columns=" + ::getInt(pSet->Cols())); m_file.writeLine("#%Rows=" + ::getInt(pSet->Rows())); } // Add the parameters. ParameterNameValueTypeConstIt begin; ParameterNameValueTypeConstIt end; pSet->Header().GetNameValIterators(begin, end); OutputParameters(begin, end); OutputData(gdata, groupIndex, setIndex, 0, -1); pSet->Delete(); } /* * Output the group information. */ void CalvinToText::OutputGroupInfo(GenericData &gdata, const std::wstring &groupName, int groupIndex) { // if (!g_bNoHeader) { m_file.writeLine("#%GroupName=" + StringUtils::ConvertWCSToMBS(groupName)); } // Add each set. int iSetCount = gdata.DataSetCnt(groupIndex); for (int i=0; i // using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_parameter; class CalvinToTextFile { public: CalvinToTextFile(); virtual ~CalvinToTextFile(); bool open(const AffxString& strFileName); bool isOpen(); void write(const char *psz); void writeLine(const AffxString& str); void close(void); protected: std::fstream* m_pfileStream; char* m_cb; }; class CalvinToText { protected: AffxString m_strFileNameIn; AffxString m_strFileNameOut; CalvinToTextFile m_file; bool m_bHeader; int m_iXChromosome; int m_iYChromosome; public: CalvinToText() { m_bHeader = false; m_iXChromosome = -1; m_iYChromosome = -1; } void run(const std::string& strFileNameIn, const std::string& strFileNameOut, bool bHeader, bool bBody, bool bNewGuid); protected: void error(const AffxString& strMessage); /* * Read the file into a data object. */ bool ReadFile(const std::string &fileName, GenericData &gdata); /* * Output the data to the command line. */ void OutputData(GenericData& gdata, int groupIndex, int setIndex, int startRow, int endRow); /* * Output the file header parameters. */ void OutputFileHeaderParameters(GenericData &gdata, bool bNewGuid); /* * Output the header parameters. */ void OutputParameters(ParameterNameValueTypeIt &begin, ParameterNameValueTypeIt &end); /* * Output the header parameters. */ void OutputParameters(ParameterNameValueTypeConstIt &begin, ParameterNameValueTypeConstIt &end); /* * Output the data header parameters. */ void OutputDataHeaderParameters(GenericDataHeader *hdr, bool bNewGuid); /* * Output the set information, only the header. */ void OutputSetInfo(GenericData &gdata, int groupIndex, int setIndex); /* * Output the group information. */ void OutputGroupInfo(GenericData &gdata, const std::wstring &groupName, int groupIndex); /* * Output the groups and sets. */ void OutputGroupsAndSets(GenericData &gdata); /* * Output the header information. */ void OutputHeader(const std::string &strFileName, affymetrix_calvin_io::GenericData& genericData, bool bNewGuid); bool isCalvinFile(const AffxString& strFileName); }; #endif // _UTIL_CALVINTOTEXT_H_ affxparser/src/fusion/util/CelCheck.h0000644000175200017520000001471614516003651020630 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file CelCheck.h * @author Alan Williams * @date Tue July 11 18:03:02 2007 * * @brief Class for doing a check of two CEL files after regression run. */ #ifndef CELCHECK_H #define CELCHECK_H #include "calvin_files/fusion/src/FusionCELData.h" #include "calvin_files/utils/src/StringUtils.h" #include "util/Fs.h" #include "util/RegressionCheck.h" #include "util/Util.h" #include "util/Verbose.h" // #include #include #include #include #include #include #include #include #include // using namespace affymetrix_fusion_io; /** * Class for testing that CEL files are the same +/- some epsilon. Also checks * to make sure that at least some of the headers are the same * (times/dates/versions change so not checked). */ class CelCheck : public RegressionCheck { public: /** * @brief Constructor * @param generated * @param gold the reference data to compare with * @param eps epsilon * @param prefix * @param diffAllowed * @return */ CelCheck( std::vector &generated, std::vector &gold, double eps, const std::string &prefix="affymetrix-", int diffAllowed=0) { m_Name = "CEL-Check"; m_Generated = generated; m_Gold = gold; m_Eps = eps; m_Prefix = prefix; m_DiffAllowed = diffAllowed; } CelCheck(std::string &generated, std::string &gold, double eps, const std::string &prefix ="affymetrix-", int diffAllowed = 0) { m_Generated.push_back(generated); m_Gold.push_back(gold); m_Eps = eps; m_Prefix = prefix; m_DiffAllowed = diffAllowed; } /** * Check to make sure that two files are the same +/- some epsilon. * @param msg - Fills in an error message if test fails, empty string otherwise. * @return - Returns true if files are close enough, false otherwise. */ bool checkFilePair(int genIdx, int goldIdx, std::string &msg) { bool success = true; try { // currently the cel file readers do not appear to like the "\\?\" , use normalizePath and not convertToUncPath m_Generated[genIdx] = Fs::normalizePath(m_Generated[genIdx]); m_Gold[goldIdx] = Fs::normalizePath(m_Gold[goldIdx]); if(!headersSame(m_Generated[genIdx], m_Gold[goldIdx], msg)) success = false; if(!dataSame(m_Generated[genIdx], m_Gold[goldIdx], msg)) { success = false; } } // end try catch(Except &e) { success &= checkMsg(false, "Error: " + ToStr(e.what()),msg); } catch(affymetrix_calvin_exceptions::CalvinException &ce) { success &= checkMsg(false, "Error: AGCC library exception: " + StringUtils::ConvertWCSToMBS(ce.Description()),msg); } catch(const std::exception &e) { success &= checkMsg(false, "Error: standard exception: " + ToStr(e.what()),msg); } catch(...) { success &= checkMsg(false, "Error: Uncaught Exception.",msg); } return success; } bool check(std::string &msg) { bool success = true; if(m_Generated.size() != m_Gold.size()) { return checkMsg(false, "CelCheck::check() - generated and gold vectors must be same size.",msg); } if(m_Generated.size() == 1 && m_Gold.size() == 1) { success = checkFilePair(0, 0, msg); } else { for(unsigned int i = 0; i < m_Generated.size(); i++) { success = checkFilePair(i, i, msg); } } return success; } private: static void fillInToIgnore(std::set &ignoreMap, const std::string &prefix) { ignoreMap.clear(); ignoreMap.insert("analysis-guid"); } bool headersSame(const std::string &generated, const std::string &gold, std::string &msgs) { ///@todo implement cel header check once we have new regression data return true; } bool dataSame(const std::string &generated, const std::string &gold, std::string &msgs) { bool success = true; double maxDiff = -1; int numDiff = 0; FusionCELData generatedCel, goldCel; goldCel.SetFileName (gold.c_str()); if (! goldCel.Read()) { msgs += "Can't read cel file: " + ToStr(gold); return false; } generatedCel.SetFileName (generated.c_str()); if (! generatedCel.Read()) { msgs += "Can't read cel file: " + ToStr(generated); return false; } const int numCells = goldCel.GetNumCells(); for (int celIx = 0; celIx < numCells; celIx++) { bool localSuccess = true; checkFloat(goldCel.GetIntensity(celIx), generatedCel.GetIntensity(celIx), m_Eps, localSuccess, maxDiff); if(!localSuccess) { numDiff++; } success &= localSuccess; } if(maxDiff > m_Eps) { Verbose::out(1, "Max diff: " + ToStr(maxDiff) + " is greater than expected (" + ToStr(m_Eps) + ")"); Verbose::out(1, ToStr(numDiff) + " of " + ToStr(numCells) + " (" + ToStr(100.0 * numDiff/numCells) + "%) were different."); } if(!success && m_DiffAllowed >= numDiff) success = true; if(!success) { msgs += "Error: " + generated + " is different from " + gold + ". "; } std::string res = "different"; if(success) res = "same"; Verbose::out(1, generated + ToStr(" chip is ") + res + " max diff is: " + ToStr(maxDiff)); return success; } std::vector m_Generated; std::vector m_Gold; double m_Eps; std::string m_Prefix; int m_DiffAllowed; }; #endif /* CELCHECK_H */ affxparser/src/fusion/util/ChpCheck.h0000644000175200017520000004155514516003651020640 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file ChpCheck.h * @author Chuck Sugnet * @date Tue Apr 25 18:03:02 2006 * * @brief Class for doing a check of two CHP files after regression run. */ #ifndef CHPCHECK_H #define CHPCHECK_H // #include "util/Fs.h" #include "util/RegressionCheck.h" #include "util/Util.h" #include "util/Verbose.h" // #include "file/CHPFileData.h" // #include #include #include #include #include #include #include #include #include /** * Class for testing that CHP files are the same +/- some epsilon. Also checks * to make sure that at least some of the headers are the same * (times/dates/versions change so not checked). */ class ChpCheck : public RegressionCheck { public: /// @brief Constructor /// @param generated /// @param gold the reference data to compare with /// @param diffAllowed the number of differences allowed (default 0) /// @param prefix the prefix for the header string alg part /// @param eps epsilon [confidence, pvalue] /// i.e. if |gen-gold| >= frac*max(|gen|,|gold|) then there is a difference. /// @param bCheckHeaders Check headers? (boolean, default: true). /// @param frac maximum accepted fractional difference in numeric values (not used by default). /// i.e. if |gen-gold| >= frac*max(|gen|,|gold|) then there is a difference. ChpCheck(std::vector &generated, std::vector &gold, int diffAllowed=0, const std::string &prefix="apt-", double eps=0.0001, bool bCheckHeaders = true, double frac=0.0) { m_Name = "XDA-CHP-Check"; m_Generated = generated; m_Gold = gold; m_Eps_confidence = eps; m_Eps_pvalue = eps; m_Eps_signal = eps; m_Frac_confidence = frac; m_Frac_pvalue = frac; m_Frac_signal = frac; m_DiffAllowed = diffAllowed; m_Prefix = prefix; m_CheckHeaders = bCheckHeaders; fillInToIgnore(m_IgnoreMap, prefix); // things know to change like user, time, etc. setMaxError(30); } ChpCheck(const std::string &generated, const std::string &gold, int diffAllowed=0, const std::string &prefix="apt-", double eps=0.0001, bool bCheckHeaders = true, double frac=0.0) { m_Generated.push_back(generated); m_Gold.push_back(gold); m_Eps_confidence = eps; m_Eps_pvalue = eps; m_Eps_signal = eps; m_Frac_confidence = frac; m_Frac_pvalue = frac; m_Frac_signal = frac; m_DiffAllowed = diffAllowed; m_Prefix = prefix; m_CheckHeaders = bCheckHeaders; fillInToIgnore(m_IgnoreMap, prefix); // things know to change like user, time, etc. setMaxError(30); } bool checkFilePair(int genIdx, int goldIdx, std::string &msg) { bool success = true; try { m_Generated[genIdx] = Fs::convertToUncPath(m_Generated[genIdx]); m_Gold[goldIdx] = Fs::convertToUncPath(m_Gold[goldIdx]); if (m_CheckHeaders && !headersSame(m_Generated[genIdx], m_Gold[goldIdx], msg)) success = false; if(!dataSame(m_Generated[genIdx], m_Gold[goldIdx], msg)) { success = false; } } // end try catch(Except &e) { success &= checkMsg(false, "Error: " + ToStr(e.what()),msg); } catch(const std::exception &e) { success &= checkMsg(false, "Error: standard exception: " + ToStr(e.what()),msg); } catch(...) { success &= checkMsg(false, "Error: Uncaught Exception.",msg); } return success; } /** * Check to make sure that two files are the same +/- some epsilon. * @param msg - Fills in an error message if test fails, empty string otherwise. * @return - Returns true if files are close enough, false otherwise. */ bool check(std::string &msg) { bool success = true; if(m_Generated.size() != m_Gold.size()) { return checkMsg(false, "CelCheck::check() - generated and gold vectors must be same size.",msg); } if(m_Generated.size() == 1 && m_Generated.size() == 1) { success = this->checkFilePair(0,0,msg); } else { for(unsigned int i = 0; i < m_Generated.size(); i++) { success = this->checkFilePair(i,i,msg); } } return success; } /** * Clear out the default set of header entries to ignore */ void clearHeaderIgnore() { m_IgnoreMap.clear(); } /** * Add a header item to ignore * @param key - reference to a wide string */ void addHeaderIgnore(std::string &key) { m_IgnoreMap.insert(key); } private: // Header entries to ignore static void fillInToIgnore(std::set &ignoreMap, const std::string &prefix) { ignoreMap.clear(); ignoreMap.insert("program-version"); ignoreMap.insert(prefix + "exec-guid"); ignoreMap.insert(prefix + "analysis-guid"); ignoreMap.insert(prefix + "time-str"); ignoreMap.insert(prefix + "free-mem"); ignoreMap.insert(prefix + "cvs-id"); ignoreMap.insert(prefix + "version"); ignoreMap.insert(prefix + "opt-out-dir"); } /** * Check the gold tag value pairs to make sure they are getting the * same values in the test. We only check items that are in gold, * which means that if a new field has been added it will not be * checked. * * @param gold - List of key, value pairs in the gold set. * @param test - List of key, value pairs in the set to be checked. * @param msg - Place for adding error messages as they are encountered. * * @return - true if same, false otherwise. */ bool tagValuePairMostlySame(TagValuePairTypeList &gold, TagValuePairTypeList &test, std::string &msg) { bool same = true; std::map testMap; std::map::iterator testMapIter; TagValuePairTypeList::iterator testIter; // Load up test as a map which will be queried by items in gold. for(testIter = test.begin(); testIter != test.end(); ++testIter) { testMap[testIter->Tag] = testIter->Value; } TagValuePairTypeList::iterator goldIter; for(goldIter = gold.begin(); goldIter != gold.end(); ++goldIter) { // ignore items that are in the ignoreMap if(m_IgnoreMap.find(goldIter->Tag) == m_IgnoreMap.end()) { testMapIter = testMap.find(goldIter->Tag); if(testMapIter == testMap.end()) { msg += " Error: Test missing field: '" + ToStr(goldIter->Tag) + "'"; same = false; } else { bool isTestNum = false; bool isGoldNum = false; float testNum, goldNum; testNum = Convert::toFloatCheck(testMapIter->second,&isTestNum); goldNum = Convert::toFloatCheck(goldIter->Value,&isGoldNum); if(isTestNum && isGoldNum) { bool success = true; double diff = 0.0; if(!checkFloat(goldNum, testNum, m_Eps_confidence, success, diff, false, 0.0)) { msg += " Error: for field '" + goldIter->Tag + "' expecting: '" + goldIter->Value + "' got: '" + testMapIter->second + "'"; same = false; } } else { if(testMapIter->second != goldIter->Value && Fs::basename(testMapIter->second) != Fs::basename(goldIter->Value)) { msg += " Error: for field '" + goldIter->Tag + "' expecting: '" + goldIter->Value + "' got: '" + testMapIter->second + "'"; same = false; } } } } } return same; } bool headersSame(const std::string &generated, const std::string &gold, std::string &msgs) { bool success = true; affxchp::CCHPFileData generatedChp, goldChp; affxchp::CCHPFileHeader genHdr, goldHdr; generatedChp.SetFileName(generated.c_str()); goldChp.SetFileName(gold.c_str()); if(!generatedChp.ReadHeader()) { success = false; success &= checkMsg(false, "Error: Can't read CHP Header in '" + ToStr(generated) + "' error is: " + generatedChp.GetError(), msgs); return success; } if(!goldChp.ReadHeader()) { success = false; success &= checkMsg(false, "Error: Can't read CHP Header in '" + ToStr(gold) + "' error is: " + goldChp.GetError(), msgs); return success; } if(success) { genHdr = generatedChp.GetHeader(); goldHdr = goldChp.GetHeader(); success &= checkMsg(genHdr.GetCols() == goldHdr.GetCols(), "Error: cols not the same. ", msgs); success &= checkMsg(genHdr.GetRows() == goldHdr.GetRows(), "Error: rows not the same. ", msgs); success &= checkMsg(genHdr.GetNumProbeSets() == goldHdr.GetNumProbeSets(), "Error: NumProbeSets not the same. ", msgs); success &= checkMsg(genHdr.GetChipType() == goldHdr.GetChipType(), "Error: ChipType not the same. ", msgs); success &= checkMsg(genHdr.GetAlgVersion() == goldHdr.GetAlgVersion(), "Error: AlgVersion not the same. ", msgs); success &= checkMsg(genHdr.GetProgID() == goldHdr.GetProgID(), "Error: ProgID not the same. ", msgs); success &= checkMsg(genHdr.GetAssayType() == goldHdr.GetAssayType(), "Error: Assay Type not the same. ", msgs); TagValuePairTypeList &goldList = goldHdr.AlgorithmParameters(); TagValuePairTypeList &genList = genHdr.AlgorithmParameters(); bool paramSame = tagValuePairMostlySame(goldList, genList, msgs); success &= paramSame; goldList = goldHdr.SummaryParameters(); genList = genHdr.SummaryParameters(); paramSame = tagValuePairMostlySame(goldList, genList, msgs); success &= paramSame; } return success; } bool dataSame(const std::string &generated, const std::string &gold, std::string &msgs) { bool success = true; double maxDiffConf = -1; double maxDiffPval = -1; double maxDiffSignal = -1; int numDiff = 0; affxchp::CCHPFileData generatedChp, goldChp; affxchp::CCHPFileHeader genHdr, goldHdr; generatedChp.SetFileName(generated.c_str()); goldChp.SetFileName(gold.c_str()); if(!generatedChp.Read()) { success = false; success &= checkMsg(false, "Error: Can't read CHP '" + ToStr(generated) + "' error is: " + generatedChp.GetError(), msgs); return success; } if(!goldChp.Read()) { success = false; success &= checkMsg(false, "Error: Can't read CHP '" + ToStr(gold) + "' error is: " + goldChp.GetError(), msgs); return success; } genHdr = generatedChp.GetHeader(); goldHdr = goldChp.GetHeader(); if (genHdr.GetAssayType() != goldHdr.GetAssayType()) {success &= checkMsg(false, "Error: Assay Types are not the same. Gold: " + gold + " " + ToStr(goldHdr.GetAssayType()) + " Generated: " + generated + " " + ToStr(genHdr.GetAssayType()), msgs); return success;} if (genHdr.GetNumProbeSets() != goldHdr.GetNumProbeSets()) {success &= checkMsg(false, "Error: Number of Probe Sets are not the same. Gold: " + gold + " " + ToStr(goldHdr.GetNumProbeSets()) + " Generated: " + generated + " " + ToStr(genHdr.GetNumProbeSets()), msgs); return success;} if(genHdr.GetAssayType() == affxchp::CCHPFileHeader::Genotyping) { for(int i = 0; i < genHdr.GetNumProbeSets(); i++) { bool localSuccess = true; affxchp::CGenotypeProbeSetResults *genResults = generatedChp.GetGenotypingResults(i); if(genResults == NULL) Err::errAbort("Failed to get entry for test (" + ToStr(i) + "). File: " + generated + " Error: " + generatedChp.GetError()); affxchp::CGenotypeProbeSetResults *goldResults = goldChp.GetGenotypingResults(i); if(goldResults == NULL) Err::errAbort("Failed to get entry for gold (" + ToStr(i) + "). File: " + gold + " Error: " + generatedChp.GetError()); if(goldResults->AlleleCall != genResults->AlleleCall) localSuccess = false; checkFloat(goldResults->Confidence, genResults->Confidence, m_Eps_confidence, localSuccess, maxDiffConf, false, m_Frac_confidence); checkFloat(goldResults->pvalue_AA, genResults->pvalue_AA, m_Eps_pvalue, localSuccess, maxDiffPval, false, m_Frac_pvalue); checkFloat(goldResults->pvalue_AB, genResults->pvalue_AB, m_Eps_pvalue, localSuccess, maxDiffPval, false, m_Frac_pvalue); checkFloat(goldResults->pvalue_BB, genResults->pvalue_BB, m_Eps_pvalue, localSuccess, maxDiffPval, false, m_Frac_pvalue); checkFloat(goldResults->pvalue_NoCall, genResults->pvalue_NoCall, m_Eps_pvalue, localSuccess, maxDiffPval, false, m_Frac_pvalue); ///@todo check RAS fields //checkFloat(goldResults->RAS1, genResults->RAS1, m_Eps_ras, localSuccess, maxDiffRas, false, m_Frac_ras); //checkFloat(goldResults->RAS2, genResults->RAS2, m_Eps_ras, localSuccess, maxDiffRas, false, m_Frac_ras); if(!localSuccess) numDiff++; success &= localSuccess; } if(maxDiffConf > m_Eps_confidence) Verbose::out(1, "Max diff: " + ToStr(maxDiffConf) + " is greater than expected (" + ToStr(m_Eps_confidence) + ") [confidence]"); if(maxDiffPval > m_Eps_pvalue) Verbose::out(1, "Max diff: " + ToStr(maxDiffPval) + " is greater than expected (" + ToStr(m_Eps_pvalue) + ") [pvalue]"); if(numDiff > 0) Verbose::out(1, ToStr(numDiff) + " of " + ToStr(genHdr.GetNumProbeSets()) + " (" + ToStr(100.0 * numDiff/genHdr.GetNumProbeSets()) + "%) were different."); std::string res = "different"; if(success) res = "same"; Verbose::out(1, generated + ": checked " + ToStr(genHdr.GetNumProbeSets()) + " genotype entries."); Verbose::out(1, generated + ": max confidence diff is: " + ToStr(maxDiffConf)); Verbose::out(1, generated + ": max pvalue diff is: " + ToStr(maxDiffPval)); Verbose::out(1, generated + ": chip is " + res + "."); } else if(genHdr.GetAssayType() == affxchp::CCHPFileHeader::Expression) { for(int i = 0; i < genHdr.GetNumProbeSets(); i++) { bool localSuccess = true; affxchp::CExpressionProbeSetResults *genResults = generatedChp.GetExpressionResults(i); affxchp::CExpressionProbeSetResults *goldResults = goldChp.GetExpressionResults(i); checkFloat(goldResults->Signal, genResults->Signal, m_Eps_signal, localSuccess, maxDiffSignal, false, m_Frac_signal); ///@todo check other fields if(!localSuccess) numDiff++; success &= localSuccess; } if(maxDiffSignal > m_Eps_signal) Verbose::out(1, "Max diff: " + ToStr(maxDiffSignal) + " is greater than expected (" + ToStr(m_Eps_signal) + ") [signal]"); if(numDiff > 0) Verbose::out(1, ToStr(numDiff) + " of " + ToStr(genHdr.GetNumProbeSets()) + " (" + ToStr(100.0 * numDiff/genHdr.GetNumProbeSets()) + "%) were different."); std::string res = "different"; if(success) res = "same"; Verbose::out(1, generated + ": checked " + ToStr(genHdr.GetNumProbeSets()) + " expression entries."); Verbose::out(1, generated + ": max signal diff is: " + ToStr(maxDiffSignal)); Verbose::out(1, generated + ": chip is " + res + "."); } else { Err::errAbort("ChpCheck::ChpCheck() - unknown CHP type."); } if(!success && m_DiffAllowed >= numDiff) success = true; if(!success) msgs += "Error: " + generated + " is different from " + gold + ". "; return success; } /// Filenames for the gold standard or correct data chp files. std::vector m_Gold; /// Matching filenames for the chp files to be tested. std::vector m_Generated; /// Epsilon, small value that two floats can differ by but still be considered equivalent. double m_Eps_confidence; double m_Eps_pvalue; double m_Eps_signal; double m_Frac_confidence; double m_Frac_pvalue; double m_Frac_signal; bool m_CheckHeaders; /// How many differences will we tolerate? int m_DiffAllowed; /// What is the expected prefix for parameter names? std::string m_Prefix; /// Header entries to ignore std::set m_IgnoreMap; }; #endif /* CHPCHECK_H */ affxparser/src/fusion/util/Convert.cpp0000644000175200017520000003112614516003651021134 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file Convert.cpp * @author Chuck Sugnet * @date Wed May 4 15:24:08 2005 * * @brief Definitions for Convert class. * */ // #include "util/Convert.h" // #include "util/Err.h" #include "util/Util.h" // #include #include #include #include #include #include #include #include #include #include // using namespace std; /** * Make a string from an integer. * @param i - integer to be converted. * * @return - String version of int i; */ string Convert::toString(int i) { stringstream ss; string str; ss << i; ss >> str; return str; } /** * Make a string from an double * @param d - double to be converted. * * @return - String version of int i; */ std::string Convert::toString(double d) { stringstream ss; string str; ss << d; ss >> str; // we prefer the linux format (inf/nan) to that of the windows output. // convert the strings to our perfered format. if (str == "-Inf") { str = "-inf"; } else if (str == "Inf") { str = "inf"; } #ifdef WIN32 else if (str.size() >= 5) { if (str == "-1.#INF") { str="-inf"; } else if (str == "1.#INF") { str="inf"; } else if (str == "-1.#IND") { str="nan"; } else if (str == "1.#IND") { str="nan"; } // For Windows, check to see if a three digit exponent with a leading zero // is being used for double representation. if so, remove the extra zero // (2 exponent digits conforms to 1999 C standard) // e.g. 1.234e003 and 1.234e-003 become 1.234e03 and 1.234e-03 else if (str.at(str.size() - 3) == '0' && (str.at(str.size() - 4) == 'e' || str.at(str.size() - 5) == 'e') ) { str.erase(str.size() - 3, 1); } } #endif return str; } /** * Make an int from a c string or die trying. * @param num - c string representation of number to be parsed. * * @return - Integer representation of num */ int Convert::toInt(const std::string& num) { bool success = true; int i = Convert::toIntCheck(num, &success); if(success != true) Err::errAbort("Could not convert '" + std::string(num) + "' to an int."); return i; } /** * Make an int from a c string. * @param num - c string representation of number to be parsed. * @param success - If success != NULL set to true upon * successful conversion and false upon failur. * @return - Integer representation of num, 0 if success == false */ int Convert::toIntCheck(const std::string& num, bool *success) { long int l = 0; int i = 0; bool ok = true; char *end = NULL; const char* num_c_str=num.c_str(); assert(num_c_str); errno = 0; l = strtol(num_c_str, &end, 10); // end will be NULL if entire string converted. ok = (*end != '\0' || end == num_c_str) ? false : true; // cast to int and make sure that we didn't overflow i = (int)l; if(errno != 0 || (long)i != l) ok = false; // if we had a problem set to 0 for consistency. if(!ok) i = 0; if(success != NULL) (*success) = ok; return i; } /** * Make an uint from a c string or die trying. * @param num - c string representation of number to be parsed. * * @return - Unsigned Integer representation of num */ unsigned int Convert::toUnsignedInt(const std::string& num) { bool success = true; unsigned int i = Convert::toUnsignedIntCheck(num, &success); if(success != true) Err::errAbort("Could not convert '" + std::string(num) + "' to an unsigned int."); return i; } uint64_t Convert::toUnsignedInt64(const std::string& num) { bool success = true; uint64_t i = Convert::toUnsignedInt64Check(num, &success); if(success != true) Err::errAbort("Could not convert '" + std::string(num) + "' to an unsigned int64."); return i; } /** * Make an unsigned int from a c string. * @param num - c string representation of number to be parsed. * @param success - If success != NULL set to true upon * successful conversion and false upon failur. * @return - Unsigned Integer representation of num, 0 if success == false */ unsigned int Convert::toUnsignedIntCheck(const std::string& num, bool *success) { long unsigned int l = 0; unsigned int i = 0; bool ok = true; char *end = NULL; const char* num_c_str=num.c_str(); assert(num_c_str); errno = 0; l = strtoul(num_c_str, &end, 10); // end will be NULL if entire string converted. ok = (*end != '\0' || end == num_c_str) ? false : true; // cast to unsigned int and make sure that we didn't overflow i = (unsigned int)l; if(errno != 0 || (long unsigned int)i != l || l >= UINT_MAX) ok = false; // if we had a problem set to 0 for consistency. if(!ok) i = 0; if(success != NULL) (*success) = ok; return i; } uint64_t Convert::toUnsignedInt64Check(const std::string& num, bool *success) { long unsigned int l = 0; uint64_t i = 0; bool ok = true; char *end = NULL; const char* num_c_str=num.c_str(); assert(num_c_str); errno = 0; l = strtoul(num_c_str, &end, 10); // end will be NULL if entire string converted. ok = (*end != '\0' || end == num_c_str) ? false : true; // cast to unsigned int and make sure that we didn't overflow i = (uint64_t)l; if(errno != 0 || (long unsigned int)i != l || l >= ULONG_MAX) ok = false; // if we had a problem set to 0 for consistency. if(!ok) i = 0; if(success != NULL) (*success) = ok; return i; } /** * Make an float from a c string. * @param num - c string representation of number to be parsed. * * @return - Float representation of num */ float Convert::toFloat(const std::string& num) { bool success = true; float f = toFloatCheck(num, &success); if(!success) Err::errAbort("Could not convert '" + std::string(num) + "' to a float."); return f; } /** * Make an float from a c string. * @param num - c string representation of number to be parsed. * @param success - If success != NULL set to true upon * successful conversion and false upon failur. * @return - Float representation of num */ float Convert::toFloatCheck(const std::string& num, bool *success) { double d = toDoubleCheck(num, success); if(d > FLT_MAX && d < DBL_MAX) { (*success) = false; } return float(d); } /** * Make an double from a c string. * @param num - c string representation of number to be parsed. * * @return - Double representation of num */ double Convert::toDouble(const std::string& num){ bool success = true; double d = Convert::toDoubleCheck(num, &success); if(!success) Err::errAbort("Could not convert '" + std::string(num) + "' to a double."); return d; } /** * Make an double from a c string. * @param num - c string representation of number to be parsed. * @param success - If success != NULL set to true upon * successful conversion and false upon failur. * @return - Double representation of num */ double Convert::toDoubleCheck(const std::string& num, bool *success){ const char* num_c_str=num.c_str(); assert(num_c_str); double d = 0; if (num == "NaN") { d = numeric_limits::quiet_NaN(); *success = true; return d; } char *end = NULL; bool ok = true; errno = 0; d = strtod(num_c_str, &end); // end will point at NULL if conversion successful ok = (*end != '\0' || end == num_c_str) ? false : true; /* errno should be set if over/under flow. ERANGE is a special case though of number not being represetable. For ERANGE just let things be with a warning... from https://www.securecoding.cert.org: "For numeric conversion functions in the strtod(), strtol(), wcstod(), and wcstol() families, if the correct result is outside the range of representable values, an appropriate minimum or maximum value is returned, and the value ERANGE is stored in errno. For floating-point conversion functions in the strtod() and wcstod() families, if an underflow occurs, whether errno acquires the value ERANGE is implementation-defined." */ if (errno != 0 && errno != ERANGE) { ok = false; } if (errno == ERANGE) { Verbose::out(3, "Warning - number: " + num + " is out of range (ERANGE set)"); errno = 0; } /* If not ok set to NAN to be non-usable. */ if (!ok) { d = 0.0; // @todo - check that NAN won't break everything... // d = NAN; } if(success != NULL) (*success) = ok; return d; } /** * Make an bool from a c string. * @param flag - c string representation of number to be parsed. * allowed value are 'true' && 'false' * @return - Bool representation of flag */ bool Convert::toBool(const std::string& flag) { bool value = false, success = false; value = Convert::toBoolCheck(flag, &success); if(!success) Err::errAbort("Could not convert '" + std::string(flag) + "' to a boolean.\n" "Valid values are: 'true','false','1','0'."); return value; } /** * Make an bool from a c string. * @param flag - c string representation of number to be parsed. * allowed value are 'true' && 'false' * @param success - If success != NULL set to true upon * successful conversion and false upon failur. * @return - Bool representation of flag, false on failure. */ bool Convert::toBoolCheck(const std::string& flag, bool *success) { bool value = false; bool ok = true; const char* flag_c_str=flag.c_str(); assert(flag_c_str); // would like to use strcasecmp, but VC++ doesnt have it if ((strcmp(flag_c_str, "true") == 0) || (strcmp(flag_c_str, "TRUE") == 0) || (strcmp(flag_c_str, "1") == 0)) { value = true; } else if ((strcmp(flag_c_str, "false") == 0) || (strcmp(flag_c_str, "FALSE") == 0) || (strcmp(flag_c_str, "0") == 0)) { value = false; } else ok = false; if(success != NULL) (*success) = ok; return value; } /** * Test to make sure that two doubles are pretty similar. * @param f1 * @param f2 * @return true if doubles are same to 6 significant digits. */ bool Convert::doubleCloseEnough(double d1, double d2, int digits) { double diff = fabs(d1 - d2); if(diff < 1 / (pow(10.0, (double)digits))) return true; return false; } /** * Make a low precision float from a normal one. Used originally to * mimic truncation seen in cel files. * @param f float to be truncated. * @return truncated float */ float Convert::floatLowPrecision(float f) { return (float)((floor((f+0.05)*10))/10.0); } void Convert::strToIntVec(const std::string& s,const char delim,std::vector& vec) { std::vector words; Util::chopString(s,delim, words); vec.clear(); vec.resize(words.size()); for(size_t i = 0; i < words.size(); i++) { vec[i] = Convert::toInt(words[i]); } } /** * Convert an integer vector to string */ std::string Convert::intVecToString(const std::vector &inputVector, const std::string &delim) { if (inputVector.empty()) { return ""; } std::string str; for (std::vector::const_iterator it = inputVector.begin(); it != inputVector.end(); ++it) { str += delim + toString(*it); } return str.substr(delim.size()); } /** * Convert a string vector to string. Silly but someone's got to do it. */ std::string Convert::strVecToString(const std::vector &inputVector, const std::string &delim) { if (inputVector.empty()) { return ""; } std::string str; for (std::vector::const_iterator it = inputVector.begin(); it != inputVector.end(); ++it) { str += delim + *it; } return str.substr(delim.size()); } void Convert::strToStrVec(const std::string& s, const char delim, std::vector& vec) { std::vector words; Util::chopString(s,delim, words); vec.clear(); vec.resize(words.size()); for(size_t i = 0; i < words.size(); i++) { vec[i] = words[i]; } } affxparser/src/fusion/util/Convert.h0000644000175200017520000001721114516003651020600 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file Convert.h * @author Chuck Sugnet * @date Wed May 4 14:53:03 2005 * * @brief Utility functions for converting between types. */ #ifndef _UTIL_CONVERT_H_ #define _UTIL_CONVERT_H_ // #include "portability/affy-base-types.h" // #include #include #include #include #include // /** * Convert * @brief Utility functions to convert between different types. */ class Convert { public: /** * Make an int from a c string or die trying. * @param num - c string representation of number to be parsed. * * @return - Integer representation of num */ static int toInt(const std::string& num); /** * Make an int from a c string. * @param num - c string representation of number to be parsed. * @param success - If success != NULL set to true upon * successful conversion and false upon failur. * @return - Integer representation of num */ static int toIntCheck(const std::string& num, bool *success); /** * Make an unsigned int from a c string or die trying. * @param num - c string representation of number to be parsed. * * @return - Unsigned Integer representation of num */ static unsigned int toUnsignedInt(const std::string& num); /** * Make an unsigned int from a c string. * @param num - c string representation of number to be parsed. * @param success - If success != NULL set to true upon * successful conversion and false upon failur. * @return - Unsigned Integer representation of num, 0 if success == false */ static unsigned int toUnsignedIntCheck(const std::string& num, bool *success); /** * Make an unsigned int from a c string or die trying. * @param num - c string representation of number to be parsed. * * @return - Unsigned Integer representation of num */ static uint64_t toUnsignedInt64(const std::string& num); /** * Make an unsigned int from a c string. * @param num - c string representation of number to be parsed. * @param success - If success != NULL set to true upon * successful conversion and false upon failur. * @return - Unsigned Integer representation of num, 0 if success == false */ static uint64_t toUnsignedInt64Check(const std::string& num, bool *success); /** * Make an float from a c string. * @param num - c string representation of number to be parsed. * * @return - Float representation of num */ static float toFloat(const std::string& num); /** * Make an float from a c string. * @param num - c string representation of number to be parsed. * @param success - If success != NULL set to true upon * successful conversion and false upon failur. * @return - Float representation of num */ static float toFloatCheck(const std::string& num, bool *success); /** * Make an double from a c string. * @param num - c string representation of number to be parsed. * * @return - Double representation of num */ static double toDouble(const std::string& num); /** * Make an double from a c string. * @param num - c string representation of number to be parsed. * @param success - If success != NULL set to true upon * successful conversion and false upon failur. * @return - Double representation of num */ static double toDoubleCheck(const std::string& num, bool *success); /** * Make an bool from a c string. * @param flag - c string representation of number to be parsed. * allowed value are 'true' && 'false' * * @return - Bool representation of flag */ static bool toBool(const std::string& flag); /** * Make an bool from a c string. * @param flag - c string representation of number to be parsed. * allowed value are 'true' && 'false' * @param success - If success != NULL set to true upon * successful conversion and false upon failur. * @return - Bool representation of flag, false on failure. */ static bool toBoolCheck(const std::string& flag, bool *success); /** * Make a string from an integer. * @param i - integer to be converted. * * @return - String version of int i; */ static std::string toString(int i); /** * Make a string from an double * @param d - double to be converted. * * @return - String version of int i; */ static std::string toString(double d); /** * Test to make sure that two doubles are pretty similar. * @param d1 * @param d2 * @param digits - number of decimal places to examine. * @return true if doubles are same to digits decimal places. */ static bool doubleCloseEnough(double d1, double d2, int digits=6); /** * Make a low precision float from a normal one. Used originally to * mimic truncation seen in cel files. * @param f float to be truncated. * @return truncated float */ static float floatLowPrecision(float f); /** * Utility function to cut a delimited string into a vector of ints. * * @param s - delimited string. * @param delim - delimiter to chop on. * @param vec - vector to be filled in. */ static void strToIntVec(const std::string &s,char delim,std::vector &vec); /** * Utility function to cut a delimited string into a vector of strings. * * @param s - delimited string. * @param delim - delimiter to chop on. * @param vec - vector to be filled in. */ static void strToStrVec(const std::string& s, const char delim, std::vector& vec); /** * The inverse of strToIntVec * * @param inputVector - the vector to convert * @param delim - delimiter to use * @return - the resulting string */ static std::string intVecToString(const std::vector &inputVector, const std::string &delim); /** * Same as intVecToString but using string vectors * * @param inputVector - the vector to convert * @param delim - delimiter to use * @return - the resulting string */ static std::string strVecToString(const std::vector &inputVector, const std::string &delim); }; /** Little template function to make string conversion easy. this isn't the fastest way to do things, but it is easy. */ template std::string ToStr(const T &t) { std::ostringstream s; s.setf(std::ios_base::boolalpha); s << t; return s.str(); } /// @brief Convert the arg to a string. /// @param arg which is a string already. /// @return a string /// @remark Since it is a string already, no conversion needs to be done. /// ToStr returns a new string, so we match that. // template <> inline std::string ToStr(const std::string& str) { std::string newstr=str; return newstr; } inline std::string ToStr(double t) { return Convert::toString(t); } inline std::string ToStr(float t) { return Convert::toString(static_cast(t)); } #endif /* _UTIL_CONVERT_H_ */ affxparser/src/fusion/util/DotProgress.cpp0000644000175200017520000000237314516003651021771 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "util/DotProgress.h" // using namespace std; DotProgress::DotProgress(std::ostream *strm) { outstr = strm; } void DotProgress::SetStepProperties(int level, int mx, int inc) { verbosity = level; dotMod = inc; dotCount = dotMod; } void DotProgress::Step(int level) { if (level <= verbosity) { --dotCount; if (dotCount <= 0) { dotCount = dotMod; outstr->put('.'); outstr->flush(); } } } affxparser/src/fusion/util/DotProgress.h0000644000175200017520000000404214516003651021431 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /* \file DotProgress.h Provides a base class for displaying progress. */ #ifndef _DotProgress_HEADER_ #define _DotProgress_HEADER_ #include "util/Progress.h" // #include #include // /*! Provides a class for showing progress with dots written to an output stream. */ class DotProgress : public Progress { private: /*! Where to stream the messages. */ std::ostream *outstr; /*! What level of messages is wanted, larger num == more msgs */ int verbosity; /*! How often do we print a dot when dot() is called? */ int dotMod; /*! How many times has dot() been called? */ int dotCount; public: /*! Constructor * @param strm Where to stream the dots */ DotProgress(std::ostream *strm = &std::cout); ~DotProgress() { } /*! Sets the upper limit of the range of the progress display. * @param level - level of verbosity desired. * @param mx The maximum number of steps in the task. * @param inc The number of steps performed before the progress display is updated. */ void SetStepProperties(int level, int mx, int inc); /*! Steps the progress ahead by one count. * @param level - What level of verbosity this message should be printed at. */ void Step(int level); }; #endif affxparser/src/fusion/util/Engines.h0000644000175200017520000000442214516003651020550 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2009 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "canary/CanaryEngine.h" #include "chipstream/apt-geno-qc/GenoQC.h" #include "chipstream/apt-probeset-genotype/ProbesetGenotypeEngine.h" #include "chipstream/apt-probeset-summarize/ProbesetSummarizeEngine.h" #include "chipstream/apt-summary-genotype/SummaryGenotypeEngine.h" #include "copynumber/CNAnalysisEngine.h" #include "copynumber/CNCytoEngine.h" #include "copynumber/CNFamilialEngine.h" #include "copynumber/CNLog2RatioEngine.h" #include "copynumber/CNReferenceEngine.h" #include "copynumber/CNWorkflowEngine.h" #include "dmet/DmetCHPWriter.h" #include "dmet/DmetCopyNumberEngine.h" #include "dmet/DmetEngine.h" #include "mas5-stat/apt-mas5/MAS5Engine.h" #include "translation/TranslationEngine.h" namespace affx { // This is basically a hack to work around the use of .a files // In short, unless we explicitly pull in the engines, the static // vars will not be pulled into the binaries and initialized void touchEngines() { { CanaryEngine engine; } { GenoQC engine; } { ProbesetGenotypeEngine engine; } { ProbesetSummarizeEngine engine; } { SummaryGenotypeEngine engine; } { CNAnalysisEngine engine; } { CNCytoEngine engine; } { CNFamilialEngine engine; } { CNLog2RatioEngine engine; } { CNReferenceEngine engine; } { CNWorkflowEngine engine; } { DmetCHPWriter engine; } { DmetCopyNumberEngine engine; } { DmetEngine engine; } { MAS5Engine engine; } { TranslationEngine engine; } } } affxparser/src/fusion/util/Err.cpp0000644000175200017520000001375714516003651020256 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file Err.cpp * @author Chuck Sugnet * @date Wed May 4 14:57:32 2005 * * @brief Some utility error functions, currently prints message and aborts() * */ // #include "util/Err.h" // #include #include #include #include #include #include #include #include using namespace std; std::string Err::m_errorPrefix="FATAL ERROR:"; void Err::setErrorPrefix(const std::string& prefix) { m_errorPrefix=prefix; } /** * @brief This function gets around the problem of static variable * initialization as local static variables work more consistently. * @return Param - Our static parameters for this class. */ Err::Param &Err::getParam() { static Param m_Param; return m_Param; } void Err::errAbort(const std::string &msg) { errAbort(msg,m_errorPrefix); } /** * Print the message supplied and abort program. * @param msg - Message for user about what went wrong. * @param prefix - Prefix to add to the error message. */ void Err::errAbort(const std::string& msg,const std::string& prefix){ // throw (Except) { unsigned int size = getParam().m_ErrHandlers.size(); if(size <= 0) { cout << "Can't have no error handlers." << endl; } std::string errMsg = prefix + msg;; // GUI's do not like the newline if(getParam().m_NewLineOnError) { errMsg = "\n" + errMsg; } getParam().m_ErrHandlers[size - 1]->handleError(errMsg); } /** * Add a new handler for errors. * @param handler - Pointer to handler to call when things go wrong. */ void Err::pushHandler(ErrHandler *handler) { getParam().m_ErrHandlers.push_back(handler); } /** * Pop off the current handler and return it for cleanup, etc. * @return - last handler pushed onto the stack. */ ErrHandler *Err::popHandler() { unsigned int count = getParam().m_ErrHandlers.size(); if(count < 1) { Err::errAbort("Err::popHandler() - can't pop error handler when there aren't any left."); } ErrHandler *handler = getParam().m_ErrHandlers[count - 1]; getParam().m_ErrHandlers.pop_back(); return handler; } /** * Give the current error handler a hint about whether or not to * throw exceptions. Note that this is just a hint and that if a * specifical behavior is really required it is best to push a * custom error handler onto the stack. * * @param doThrow - if true requesting exceptions to be thrown, if false not to be thrown */ void Err::setThrowStatus(bool doThrow) { int size = getParam().m_ErrHandlers.size(); for (int i = 0; i < size; i++) { ErrHandler *handler = getParam().m_ErrHandlers[i]; handler->setThrows(doThrow); } } /** * Return true if curerntly configured to throw exceptions on error, false otherwise. */ bool Err::getThrowStatus() { ErrHandler *handler = getParam().m_ErrHandlers.back(); return handler->getThrows(); } /** * Configure new error handler * * @param doThrow - should an exception be thrown, or call exit * @param verbose - should the error handler call verbose methods */ void Err::configureErrHandler(bool doThrow, bool verbose, bool exitOnError, int exitOnErrorValue) { VerboseErrHandler *handler = new VerboseErrHandler(doThrow, verbose, exitOnError, exitOnErrorValue); pushHandler(handler); } /** * Toggle whether we are throwing exceptions or exiting on errors. * * @param doThrow - if true exceptions will be thrown, if false exit(1) called. */ void Err::setNewLineOnError(bool newline) { getParam().m_NewLineOnError = newline; } void Err::apt_err_assert(const std::string& file,int line, const std::string& condstr, bool cond, const std::string& msg) { if (!cond) { std::string errmsg=file+":"+ToStr(line)+": '"+condstr+"'==false"; if (msg!="") { errmsg+=" "+msg; } errAbort(errmsg); } } void Err::apt_err_abort(const std::string& file,int line, const std::string& msg) { std::string errmsg=file+":"+ToStr(line)+": "+msg; errAbort(errmsg); } /** * Toggle whether to set the value returned when exiting on errors * This is primarily used to suppress Windows OS handling of * return values indicating an exe did not terminate successfully * if val is true, the value in setExitOnErrorValue will be * returned. */ void Err::setExitOnError(bool val) { int size = getParam().m_ErrHandlers.size(); if (size==0) { Verbose::warn(1,"Err::setExitOnError(): called without a handler present."); } for (int i = 0; i < size; i++) { ErrHandler *handler = getParam().m_ErrHandlers[i]; handler->setExitOnError(val); } } /** * Value to return when the flag setExitOnError is true and * exiting on errors */ void Err::setExitOnErrorValue(int val) { int size = getParam().m_ErrHandlers.size(); if (size==0) { Verbose::warn(1,"Err::setExitOnErrorValue(): called without a handler present."); } for (int i = 0; i < size; i++) { ErrHandler *handler = getParam().m_ErrHandlers[i]; handler->setExitOnErrorValue(val); } } affxparser/src/fusion/util/Err.h0000644000175200017520000001511714516003651017713 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file Err.h * @author Chuck Sugnet * @date Wed May 4 14:57:32 2005 * * @brief Some utility error functions, currently prints message and aborts() * */ #ifndef _UTIL_ERR_H_ #define _UTIL_ERR_H_ // #include "portability/apt-win-dll.h" #include "util/Convert.h" #include "util/ErrHandler.h" #include "util/Except.h" #include "util/Verbose.h" #include "util/VerboseErrHandler.h" // #include #include #include #include #include #include #include // /// Calls Err::apt_err_abort with the filename and linenumber set. #define APT_ERR_ABORT(_Why) { Err::apt_err_abort(__FILE__,__LINE__,_Why); } /// Calls Err::apt_err_assert with the filename and linenumber set. /// We want to avoid evaluating the message unless the condition is false. // this could be cleaned up to avoid the "0". #define APT_ERR_ASSERT(_Cond,_Msg) { if (!(_Cond)) { Err::apt_err_assert(__FILE__,__LINE__,#_Cond,0,_Msg); } } /** * Err * @brief Common abort with message function for programs to call. */ class APTLIB_API Err { public: static std::string m_errorPrefix; static void setErrorPrefix(const std::string& prefix); /** Utility class for having static parameters across all instances. */ class Param { public: /** By default add an error handler that calls verbose. */ Param() { VerboseErrHandler *handler = new VerboseErrHandler(); m_ErrHandlers.push_back(handler); m_NewLineOnError = true; } /** Cleanup any error handlers that are still around. */ ~Param() { for(unsigned int i = 0; i < m_ErrHandlers.size(); i++) { delete m_ErrHandlers[i]; } } std::vector m_ErrHandlers; ///< vector of handlers bool m_NewLineOnError; ///< Should we print a newline before error message? }; /** * @brief This function gets around the problem of static variable * initialization as local static variables work more consistently. * @return Param - Our static parameters for this class. */ static Param &getParam(); /** * Print the message supplied and abort program. * @param msg - Message for user about what went wrong. * @param prefix - Prefix to add to the error message. */ static void errAbort(const std::string &msg, const std::string& prefix); static void errAbort(const std::string &msg); /** * Add a new handler for errors. * @param handler - Pointer to handler to call when things go wrong. */ static void pushHandler(ErrHandler *handler); /** * Pop off the current handler and return it for cleanup, etc. * @return - last handler pushed onto the stack. */ static ErrHandler *popHandler(); /** * errAbort based version of assert with an error message. Try to * use this method in conjunction with a const char * message * (rather than constructing a string every time) when writing a * check that will be called lots of times. * * @param passed - Condition to be checked. * @param msg - Error message if failed. */ inline static void check(bool passed, const char *msg) { if(!passed) { errAbort(msg); } } /** * errAbort based version of assert with an error message. * * @param passed - Condition to be checked. * @param msg - Error message if failed. */ inline static void check(bool passed, const std::string &msg) { if(!passed) { errAbort(msg); } } /** * Give the current error handler a hint about whether or not to * throw exceptions. Note that this is just a hint and that if a * specifical behavior is really required it is best to push a * custom error handler onto the stack. * * @param doThrow - if true requesting exceptions to be thrown, if false not to be thrown */ static void setThrowStatus(bool doThrow); /** * Return true if curerntly configured to throw exceptions on error, false otherwise. */ static bool getThrowStatus(); /** * Configure new error handler * * @param doThrow - should an exception be thrown, or call exit * @param verbose - should the error handler call verbose methods */ static void configureErrHandler(bool doThrow, bool verbose, bool exitOnError=false, int exitOnErrorValue=-1); /** * Toggle whether we are throwing exceptions or exiting on errors. * * @param doThrow - if true exceptions will be thrown, if false exit(1) called. */ static void setNewLineOnError(bool newline); /// @brief The function called by the APT_ERR_ASSERT macro /// @param file Filled in from __FILE__ /// @param line Filled in from __LINE__ /// @param condstr The string form of the conditional /// @param cond The evaluated conditional /// @param msg The message to report on an error static void apt_err_assert(const std::string& file,int line, const std::string& condstr, bool cond, const std::string& msg); /// @brief The function called by the APT_ERR_ABORT macro. /// @param file Filled in from __FILE__ /// @param line Filled in from __LINE__ /// @param msg The message to abort with static void apt_err_abort(const std::string& file,int line, const std::string& msg); /** * Toggle whether to set the value returned when exiting on errors * This is primarily used to suppress Windows OS handling of * return values indicating an exe did not terminate successfully * if val is true, the value in setExitOnErrorValue will be * returned. */ static void setExitOnError(bool val); /** * Value to return when the flag setExitOnError is true and * exiting on errors */ static void setExitOnErrorValue(int val); }; #endif /* _UTIL_ERR_H_ */ affxparser/src/fusion/util/ErrHandler.h0000644000175200017520000000376014516003651021212 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file ErrHandler.h * @author Chuck Sugnet * @date Mon Jun 26 10:13:11 2006 * * @brief Interface for doing something with the errors generated by * Err::errAbort(). */ #ifndef ERRHANDLER_H #define ERRHANDLER_H #include #include // /** * Abstract base class (aka interface) for handling errors that come from * Err::errAbort(). Write a class inheriting from ErrHandler and push it * onto the handler stack via Err::pushErrHandler(). */ class ErrHandler { public: /** Virtual destructor for a virtual class. */ virtual ~ErrHandler() {} /** Prototype for the function */ virtual void handleError(const std::string &msg) = 0; /** Will this error handler be throwing exceptions? */ virtual bool getThrows() { return false; } /** Will this error handler be throwing exceptions? */ virtual void setThrows(bool doThrow) {} /** Will this error handler set the value returned when * exiting on errors */ virtual void setExitOnError(bool val) {} /** The value to return if the error handler returns * specifies the value */ virtual void setExitOnErrorValue(int val) {} }; #endif /* ERRHANDLER_H */ affxparser/src/fusion/util/Except.h0000644000175200017520000000327214516003651020412 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file Except.h * @author Chuck Sugnet * @date Tue Nov 8 10:55:45 2005 * * @brief General purpose exception for error handling. */ #ifndef EXCEPT_H #define EXCEPT_H #include #include #include // /** * @brief General purpose exception error for error handling. */ class Except : public std::exception { public: /** * @brief Constructor. * @param msg Message for the user/programmer. */ Except(const std::string &msg) : m_Msg(msg) {} /** * @brief virtual destructor for a base class. * @return */ virtual ~Except() throw() {} /** * @brief Standard what() call, just returns error message. * @return error message. */ virtual const char* what() const throw() { return m_Msg.c_str(); } private: /// Our error message. std::string m_Msg; }; #endif /* EXCEPT_H */ affxparser/src/fusion/util/FrugalVector.h0000644000175200017520000000566414516003651021574 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _UTIL_FRUGALVECTOR_H_ #define _UTIL_FRUGALVECTOR_H_ #include // template class FrugalVector { public: typedef T value_type; typedef T* pointer; typedef const T* const_pointer; typedef T& reference; typedef const T& const_reference; typedef size_t size_type; typedef T* iterator; typedef const T* const_iterator; typedef FrugalVector vector_type; T *m_Start; T *m_End; FrugalVector(size_type n, const value_type &val) { m_Start = new T[n]; m_End = m_Start + n; std::fill(m_Start, m_End, val); } FrugalVector(size_type n) { m_Start = new T[n]; m_End = m_Start + n; } FrugalVector() { m_Start = m_End = NULL; } ~FrugalVector() { delete [] m_Start; } inline T* begin() { return m_Start; } inline T* end() { return m_End; } inline const T* begin() const { return m_Start; } inline const T* end() const { return m_End; } inline reference operator[](size_type n) { return *(begin() + n); } inline const_reference operator[](size_type n) const { return *(begin() + n); } void resize(size_type n, const T &t) { if(n > size()) { T *t_start = new T[n]; std::copy(m_Start, m_End, t_start); std::fill(t_start + size(), t_start + n, t); delete [] m_Start; m_Start = t_start; m_End = m_Start + n; } else if(n < size() && n != 0) { T * t_start = new T[n]; std::copy(m_Start, m_Start + n, t_start); delete [] m_Start; m_Start = t_start; m_End = m_Start + n; } else if(n == 0) { clear(); } } inline void resize(size_type n) { resize(n, value_type()); } inline void clear() { FreezArray(m_Start); m_End = NULL; } inline void push_back(const value_type& t) { resize(size() + 1); *(m_End - 1) = t; } inline size_type capacity() const { return size_type(m_End - m_Start); } inline bool empty() const { return m_Start == m_End; } inline size_type size() const { return size_type(m_End - m_Start); } }; #endif /* _UTIL_FRUGALVECTOR_H_ */ affxparser/src/fusion/util/Fs.cpp0000644000175200017520000011742714516003651020075 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2011 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // // affy/sdk/util/Fs.cpp --- // #ifdef _MSC_VER #define _CRT_SECURE_NO_WARNINGS #endif // #include "util/Fs.h" // #include "util/Convert.h" #include "util/Err.h" #include "util/Util.h" // #include #include #include #include #ifdef _WIN32 // Windows doesn't seem to have these defined, so we'll define them // here for utility. Note that with the windows _stat() function all // users have id 0 and group 0. #define S_IRUSR 00400 //owner has read permission #define S_IWUSR 00200 //owner has write permission #define S_IXUSR 00100 ///owner has execute permission #define S_IRWXG 00070 //mask for group permissions #define S_IRGRP 00040 //group has read permission #define S_IWGRP 00020 //group has write permission #define S_IXGRP 00010 //group has execute permission #define S_IRWXO 00007 //mask for permissions for others (not in group) #define S_IROTH 00004 //others have read permission #define S_IWOTH 00002 //others have write permisson #define S_IXOTH 00001 // others have execute permission #endif // stuff for getAvailableDiskSpace #ifdef __APPLE__ #include #include #endif #ifdef __linux__ #include #include #endif #ifdef __sun__ #include #include #endif #ifdef _WIN32 #include #include #include #include #else #include #include #endif // @todo all this should go into portablity/apt-posix.h #ifdef _WIN32 // Microsoft limit is 32767 plus slush 100 // Return and input buffer. #define UNC_MAX_PATH_BUFSIZ (32767 + 100) #define POSIX_CHMOD _wchmod #define POSIX_MKDIR _wmkdir #define POSIX_UNLINK _wunlink #ifndef S_ISDIR #define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) #endif #ifndef S_ISREG #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) #endif #else #define POSIX_CHMOD ::chmod #define POSIX_MKDIR ::mkdir #define POSIX_UNLINK ::unlink #endif // cast this to a string. #define FS_ERRNO_STR() std::string("(errno="+ToStr(errno)+")") ////////// /// utility function to fill in a filesystem stat struct. AptErr_t Fs__stat(const std::string& path, struct stat& stat_buf, int& rv, bool abortOnErr) { std::string errorMsg("Fs__stat"); #ifndef _WIN32 rv=stat(path.c_str(),&stat_buf); #else WIN32_FILE_ATTRIBUTE_DATA win_attr_data; rv = 0; std::wstring unc_wpath = Fs::UncW( path ); if ( GetFileAttributesExW(unc_wpath.c_str(), GetFileExInfoStandard, &win_attr_data) ) { memset( &stat_buf, 0 , sizeof(struct stat)); if ( win_attr_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ) { stat_buf.st_mode |= S_IFDIR | S_IRUSR; if ( !(win_attr_data.dwFileAttributes & FILE_ATTRIBUTE_READONLY) ) { stat_buf.st_mode |= S_IWUSR; } } else { stat_buf.st_mode |= S_IFREG; stat_buf.st_size = (win_attr_data.nFileSizeHigh * (((int)MAXDWORD)+1)) + win_attr_data.nFileSizeLow; std::fstream fs; Fs::aptOpen(fs, path,std::ios::in|std::ios::binary); if (fs.is_open()) { fs.close(); stat_buf.st_mode |= S_IRUSR; if ( !(win_attr_data.dwFileAttributes & FILE_ATTRIBUTE_READONLY) ) { stat_buf.st_mode |= S_IWUSR; } } else { //std::cerr << Fs::MSC_VER_GetLastErrorString("fs.open") << std::endl; } } } else { //std::cerr << Fs::MSC_VER_GetLastErrorString("Fs__stat") << std::endl; rv = GetLastError(); errorMsg = Fs::MSC_VER_GetLastErrorString("Fs__stat"); } #endif // @todo decode rv to errnum if (rv==0) { return Fs::clearErr(); } return Fs::setErr(APT_ERR,errorMsg,abortOnErr); } bool Fs__isreadable(struct stat& stat_buf) { int uid; int gid; #ifdef _WIN32 uid=0; gid=0; #else uid=getuid(); gid=getgid(); #endif if ( ((uid==stat_buf.st_uid) && (S_IRUSR&stat_buf.st_mode)) || ((gid==stat_buf.st_gid) && (S_IRGRP&stat_buf.st_mode)) || (S_IROTH&stat_buf.st_mode) ) { return true; } return false; } bool Fs__iswriteable(struct stat& stat_buf) { int uid; int gid; #ifdef _WIN32 uid=0; gid=0; #else uid=getuid(); gid=getgid(); #endif if ( ((uid==stat_buf.st_uid) && (S_IWUSR&stat_buf.st_mode)) || ((gid==stat_buf.st_gid) && (S_IWGRP&stat_buf.st_mode)) || (S_IWOTH&stat_buf.st_mode) ) { return true; } return false; } ////////// AptErr_t Fs::m_err_num; std::string Fs::m_err_msg; AptErr_t Fs::setErr(AptErr_t err_num,const std::string& err_msg,bool abortOnErr) { m_err_num=err_num; m_err_msg=err_msg; return m_err_num; } AptErr_t Fs::getErrNum() { return m_err_num; } std::string Fs::getErrMsg() { return m_err_msg; } AptErr_t Fs::clearErr() { m_err_num=APT_OK; m_err_msg=""; return m_err_num; } /// translate AptErr_t Fs::setErrFromErrno(const std::string& path,bool abortOnErr) { return setErrFromErrno(path,errno,abortOnErr); } /// translate AptErr_t Fs::setErrFromErrno(const std::string& path,int sys_errno,bool abortOnErr) { switch (sys_errno) { case 0: return clearErr(); case EEXIST: return setErr(APT_ERR_EXISTS,"exists: "+FS_QUOTE_PATH(path),abortOnErr); break; case EACCES: return setErr(APT_ERR_ACCESS,"access: "+FS_QUOTE_PATH(path),abortOnErr); break; default: return setErr(APT_ERR,"generic error: "+FS_QUOTE_PATH(path),abortOnErr); break; } return setErr(APT_ERR,"generic error: "+FS_QUOTE_PATH(path),abortOnErr); } ////////// std::string Fs::pathSep() { return std::string("/"); } std::string Fs::osPathSep( const std::string & path ) { // dont use FSPATH_PATHSEP here as these should be literal strings. #ifdef _WIN32 if ( path.empty() ) { return std::string("\\"); } return Fs::windowifyPath(path); #else if ( path.empty() ) { return std::string("/"); } return Fs::unixifyPath(path); #endif } ////////// // Dont use FSPATH_PATHSEP for these two methods // as they should be literial chars. AptErr_t Fs::normalizePathInPlace(std::string& str) { return unixifyPathInPlace(str); } std::string Fs::normalizePath(const std::string& str) { std::string tmp=str; normalizePathInPlace(tmp); return tmp; } // this is also used to unixify the string-path to our internal conventions. AptErr_t Fs::unixifyPathInPlace(std::string& str) { // fix the slashes to go the correct way. std::replace(str.begin(),str.end(),'\\','/'); return APT_OK; } std::string Fs::unixifyPath(const std::string& str) { std::string tmp=str; unixifyPathInPlace(tmp); return tmp; } /// do what needs to be done to windowify a string. AptErr_t Fs::windowifyPathInPlace(std::string& str) { // flip the slashes. std::replace(str.begin(),str.end(),'/','\\'); return APT_OK; } std::string Fs::windowifyPath(const std::string& str) { std::string tmp=str; windowifyPathInPlace(tmp); return tmp; } ////////// std::string Fs::join(const std::string& arg1, const std::string& arg2) { if (arg1=="") { return normalizePath(arg2); } if (arg2=="") { return normalizePath(arg1); } if (arg1=="/") { return "/"+normalizePath(arg2); } return normalizePath(arg1+"/"+arg2); } /// @todo could be changed not to norm std::string Fs::join(const std::string& arg1, const std::string& arg2, const std::string& arg3) { return join(join(arg1,arg2),arg3); } std::string Fs::join(const std::string& arg1, const std::string& arg2, const std::string& arg3, const std::string& arg4) { return join(join(join(arg1,arg2),arg3),arg4); } std::string Fs::join(std::vector& args) { std::string tmp_str; for (unsigned int i=0;i0)&&(path.compare(path.size()-1,1,FS_PATHSEP)==0)) { return true; } return false; } std::string Fs::addTrailingSlash(const std::string& path_in) { std::string path_out=path_in; if ((path_in!="")&&(!hasTrailingSlash(path_out))) { path_out+=FS_PATHSEP; } return path_out; } std::string Fs::trimTrailingSlash(const std::string& path_in) { std::string path_out=path_in; if ((path_out!=FS_PATHSEP)&&(hasTrailingSlash(path_out))) { path_out.erase(path_out.size()-1); } return path_out; } ////////// std::string Fs::basename(const std::string& path) { // unixify the string. std::string tmp_path=normalizePath(path); // should this be an error? // no as it makes taking the basename of "" fail. // we have lots of "foo=Fs::basename(getOpt("foo"))" where foo // is unset. //if (tmp_path=="") { //APT_ERR_ABORT("cant take the basename of '"+tmp_path+"'"); //} // std::string::size_type pos; pos=tmp_path.rfind(FS_PATHSEP); if (pos==std::string::npos) { return tmp_path; } tmp_path=tmp_path.substr(pos+1); //printf("Fs::basename('%s')=>'%s'\n",path.c_str(),tmp_path.c_str()); return tmp_path; } std::vector Fs::basename(const std::vector& paths) { std::vector out; out.reserve(paths.size()); for (unsigned int i=0;i'%s'\n",path.c_str(),tmp_path.c_str()); return tmp_path; } std::string Fs::extname(const std::string& path) { // unixify the string. std::string tmp_path=basename(path); // std::string::size_type pos; pos=tmp_path.find(FS_EXTSEP); if (pos==std::string::npos) { return ""; } tmp_path=tmp_path.substr(pos); //printf("Fs::extname('%s')=>'%s'\n",path.c_str(),tmp_path.c_str()); return tmp_path; } std::string Fs::noextname(const std::string& path) { // unixify the string. std::string tmp_path=path; unixifyPathInPlace(tmp_path); // std::string::size_type slash_pos; slash_pos=tmp_path.rfind(FS_PATHSEP); if (slash_pos==std::string::npos) { slash_pos=0; } std::string::size_type ext_pos; ext_pos=tmp_path.find(FS_EXTSEP,slash_pos); if (ext_pos!=std::string::npos) { tmp_path.erase(ext_pos); } //printf("Fs::noextname('%s')=>'%s'\n",path.c_str(),tmp_path.c_str()); return tmp_path; } std::string Fs::noextname1(const std::string& path) { // unixify the string. std::string tmp_path=path; unixifyPathInPlace(tmp_path); // std::string::size_type slash_pos; slash_pos=tmp_path.rfind(FS_PATHSEP); if (slash_pos==std::string::npos) { slash_pos=0; } std::string::size_type ext_pos; ext_pos=tmp_path.rfind(FS_EXTSEP); if ((ext_pos!=std::string::npos)&&(ext_pos>slash_pos)) { tmp_path.erase(ext_pos); } //printf("Fs::noextname1('%s')=>'%s'\n",path.c_str(),tmp_path.c_str()); return tmp_path; } std::string Fs::noextnameN(const std::string& path, int cnt) { // unixify the string. std::string tmp_path=path; unixifyPathInPlace(tmp_path); // no change. if (cnt==0) { return tmp_path; } // std::string::size_type fn_start; fn_start=tmp_path.rfind(FS_PATHSEP); if (fn_start==std::string::npos) { fn_start=0; } else { fn_start+=1; // the first char after the "/" } std::string::size_type fn_end=tmp_path.size(); std::string::size_type tmp_pos=std::string::npos; for (int n=0;n'%s'\n",path.c_str(),cnt,tmp_path.c_str()); return tmp_path; } void Fs::splitPath(const std::string& path,std::string& drive,std::vector& parts) { std::string::size_type s_pos; std::string::size_type e_pos; // clear our outputs before doing work. drive=""; parts.clear(); clearErr(); std::string tmp_path=normalizePath(path); // split off the drive. ("C:") if ((tmp_path.size()>=2)&&(tmp_path[1]==':')) { drive=tmp_path.substr(0,2); tmp_path=tmp_path.substr(2); } else { drive=""; } if (tmp_path=="") { return; } s_pos=0; e_pos=0; while (true) { e_pos=tmp_path.find(FS_PATHSEP,s_pos); //printf(" %d-%d = '%s'\n",int(s_pos),int(e_pos),tmp_path.substr(s_pos,e_pos-s_pos).c_str()); if (e_pos==0) { // an abs path, push "/" as the first value. parts.push_back(FS_PATHSEP); } else if (e_pos==std::string::npos) { parts.push_back(tmp_path.substr(s_pos)); // and we are done. break; } else { parts.push_back(tmp_path.substr(s_pos,e_pos-s_pos)); } //printf(" %d-%d = '%s'\n",int(s_pos),int(e_pos),parts[parts.size()-1].c_str()); // advance s_pos=e_pos+1; if (s_pos>=tmp_path.size()) { break; } } } // @todo should have a switch to choose between "noextname1" and "noextname". std::vector Fs::changeDirAndExt(const std::string& new_dir, const std::vector& paths, const std::string& new_ext) { std::vector out; out.reserve(paths.size()); for(unsigned int i=0;i MAX_PATH DWORD rv=GetFullPathNameW(wsrcpath.c_str(),UNC_MAX_PATH_BUFSIZ, wdstpath_char, NULL); if (rv==0) { APT_ERR_ABORT("GetFullPathNameW failed."); } path = Util::toString( wdstpath_char ); // if there is something there, tack on the "\\?\" prefix. // so we and windows know this is a UNC path. if (path!="") { path="\\\\?\\"+path; } // return APT_OK; #endif } ////////// std::string Fs::convertCommandToUnc(const std::string& cmd) { // find the first sep std::string::size_type pos; pos=cmd.find(" "); // no sep? then no change. if (pos==std::string::npos) { return cmd; } // split the command into exe and args. std::string cmd_exe=cmd.substr(0,pos); std::string cmd_args=cmd.substr(pos); // change the command std::string cmd_exe_unc=Fs::convertToUncPath(cmd_exe); // put it back together. return cmd_exe_unc+cmd_args; } ////////// bool Fs::exists(const std::string& path) { int stat_rv; struct stat stat_buf; stat_rv = Fs__stat(path,stat_buf,stat_rv,false); if (stat_rv==0) { clearErr(); return true; } // setErrFromErrno(path,false); return false; } bool Fs::dirExists(const std::string& path) { int stat_rv; struct stat stat_buf; Fs__stat(path,stat_buf,stat_rv,false); if (stat_rv!=0) { setErrFromErrno(path,false); return false; } if (!(S_ISDIR(stat_buf.st_mode))) { setErr(APT_ERR,"dirExists(): not a dir",false); return false; } clearErr(); return true; } bool Fs::fileExists(const std::string& path) { int stat_rv; struct stat stat_buf; Fs__stat(path,stat_buf,stat_rv,false); if (stat_rv!=0) { setErrFromErrno(path,false); return false; } if (!S_ISREG(stat_buf.st_mode)) { setErr(APT_ERR,"fileExists(): not a file",false); return false; } clearErr(); return true; } bool Fs::isReadable(const std::string& path) { AptErr_t rv; return isReadable(path,rv); } bool Fs::isReadable(const std::string& path,AptErr_t& rv) { int stat_rv; struct stat stat_buf; Fs__stat(path,stat_buf,stat_rv,false); if (stat_rv!=APT_OK) { setErrFromErrno(path,false); return false; } return Fs__isreadable(stat_buf); } bool Fs::isWriteable(const std::string& path,AptErr_t& rv) { int stat_rv; struct stat stat_buf; Fs__stat(path,stat_buf,stat_rv,false); if (stat_rv!=APT_OK) { setErrFromErrno(path,false); return false; } return Fs__iswriteable(stat_buf); } bool Fs::isWriteable(const std::string& path) { AptErr_t rv; return isWriteable(path,rv); } bool Fs::isReadableDir(const std::string& path) { AptErr_t rv; return isReadableDir(path,rv); } bool Fs::isReadableDir(const std::string& path,AptErr_t& rv) { int stat_rv; struct stat stat_buf; Fs__stat(path,stat_buf,stat_rv,false); if (stat_rv!=APT_OK) { setErrFromErrno(path,false); return false; } if (!(S_ISDIR(stat_buf.st_mode))||(Fs__isreadable(stat_buf)==false)) { return false; } return true; } bool Fs::isWriteableDir(const std::string& path) { AptErr_t rv; return isWriteableDir(path,rv); } bool Fs::isWriteableDir(const std::string& path,AptErr_t& rv) { int stat_rv; struct stat stat_buf; Fs__stat(path,stat_buf,stat_rv,false); // no path. if (stat_rv!=0) { setErrFromErrno(path,false); return false; } // we would like to just write this: // return S_ISDIR(stat_buf.st_mode)&&Fs__iswriteable(stat_buf); // but we cant on windows. Why? // Sometimes windows has it marked read only, but that is just // shorthand for "special". // See http://jira.ev.affymetrix.com:8080/browse/APT-380 // this isnt a dir, so we fail. if (S_ISDIR(stat_buf.st_mode)==0) { return false; } // os says it is writeable, so we succeed if (Fs__iswriteable(stat_buf)==true) { return true; } // so now it is a dir, but os says it is read-only... #ifdef _WIN32 // however... on windows we check to see if we can write to it anyways. /// @todo generate a better test name. std::string probe_path=Fs::join(path,"Fs-Test-2009323948.tmp"); if (fileExists(probe_path)) { APT_ERR_ABORT("Probefile "+FS_QUOTE_PATH(probe_path)+" exists already. It shouldnt."); } // try and create the file. Fs::touch(probe_path,false); // did it work? if (fileExists(probe_path)) { // yep get rid of it Fs::rm(probe_path); // and say we did. return true; } #endif // unix doesnt lie... we cant write, say we cant. return false; } ////////// /// @brief Does the file exist and start with the sequence of bytes. /// @param path path to the file to test. /// @param bytes vector of ints with a terminal "0". /// @return true if the file does exist and have the byte sequence. static bool file_starts_with(const std::string& path,const int* magic_bytes) { std::fstream fs; Fs::aptOpen(fs, path, std::ios::in|std::ios::binary); if (!fs.good()) { return false; } for (int i=0;magic_bytes[i]!=0;i++) { if (fs.get()!=magic_bytes[i]) { fs.close(); return false; } } fs.close(); return true; } bool Fs::isHdf5File(const std::string& path) { int magic_bytes[]={ 0x89, 0x48, 0x44, 0x46, 0x0d, 0x00 }; // H D F return file_starts_with(path,magic_bytes); } bool Fs::isCalvinFile(const std::string& path) { int magic_bytes[]={ 0x3b, 0x01, 0x00 }; // the short calvin magic. return file_starts_with(path,magic_bytes); } bool Fs::isBinaryFile(const std::string& path) { std::fstream fs; Fs::aptOpen(fs, path, std::ios::in|std::ios::binary); if (!fs.good()) { return false; } int cnt_txt=0; int cnt_bin=0; for (int i=0;i<1024;i++) { int c=fs.get(); // the end? if (c==-1) { break; } // text? if (((c>=0x20)&&(c<=0x7E)) // " " -- "~" || (c==0x09) // "\t" || (c==0x0A) // "\n" || (c==0x0D) // "\r" ) { cnt_txt++; continue; } cnt_bin++; } // maybe compute some ratio? double cnt_total=cnt_bin+cnt_txt; if (cnt_total==0) { return false; } // for now, if there are any, we take the file as being binary. if (cnt_bin>0) { return true; } // not binary. (Which is not the same as text.) return false; } ////////// int64_t Fs::fileSize(const std::string& path,bool abortOnErr) { AptErr_t errnum; int64_t file_size=fileSize(path,errnum); // if ((errnum!=APT_OK)&&(abortOnErr==true)) { setErr(getErrNum(),getErrMsg(),abortOnErr); } return file_size; } int64_t Fs::fileSize(const std::string& path,AptErr_t& errnum) { int stat_rv; struct stat stat_buf; int64_t file_size=-1; if ((errnum=Fs__stat(path,stat_buf,stat_rv,false))==APT_OK) { file_size=stat_buf.st_size; } return file_size; } ////////// AptErr_t Fs::chmodBasic(const std::string& path,int mode,bool abortOnErr) { int rv; #ifdef _WIN32 // http://technet.microsoft.com/en-us/library/bb463216.aspx // This only works at the user level. // http://msdn.microsoft.com/en-us/library/1z319a54.aspx // When both constants are given, they are joined with the bitwise OR operator ( | ). // If write permission is not given, the file is read-only. Note that all files are always readable; // it is not possible to give write-only permission. Thus, the modes _S_IWRITE and _S_IREAD | _S_IWRITE are equivalent. std::wstring unc_path=Fs::UncW(path); rv=POSIX_CHMOD(unc_path.c_str(),mode); #else rv=POSIX_CHMOD(path.c_str(),mode); #endif if (rv!=0) { return setErr(APT_ERR,"chmod failed. "+FS_ERRNO_STR(),abortOnErr); } return clearErr(); } ////////// AptErr_t Fs::listDir(const std::string& path,std::vector& names,bool abortOnErr) { clearErr(); names.resize(0); if (path=="") { setErr(APT_ERR_NOTEXISTS,"blank path.",abortOnErr); } #ifdef _WIN32 // We need the "*" to match files. // This isnt just opening the dir and reading the contents, // but rather "what matches?". std::wstring unc_path=Fs::UncW(join(path,"*")); // WIN32_FIND_DATAW ffd; HANDLE hFind = INVALID_HANDLE_VALUE; DWORD rv=0; hFind = FindFirstFileW(unc_path.c_str(), &ffd); if (INVALID_HANDLE_VALUE == hFind) { return setErr(APT_ERR_NOTEXISTS,Util::toString(unc_path),abortOnErr); } while (true) { names.push_back(Util::toString(ffd.cFileName)); if (FindNextFileW(hFind, &ffd)==0) { break; } } FindClose(hFind); #else DIR *d_p; struct dirent* de_p; d_p=opendir(path.c_str()); if (d_p==NULL) { return setErr(APT_ERR,"osListDir failed. "+FS_ERRNO_STR(),abortOnErr); } //int cnt=0; std::string tmp_name; while ((de_p=readdir(d_p))!=NULL) { //printf(" %2d: '%s'\n",cnt++,de_p->d_name); // tmp_name=de_p->d_name; if ((tmp_name==".")||(tmp_name=="..")) { continue; } // add it names.push_back(tmp_name); } closedir(d_p); #endif // return them in order. sort(names.begin(),names.end()); return clearErr(); } ////////// AptErr_t Fs::mkdir(const std::string& path, //bool errIfExists, bool abortOnErr) { int rv; #ifdef _WIN32 std::wstring unc_path=Fs::UncW(path); rv=POSIX_MKDIR(unc_path.c_str()); #else rv=POSIX_MKDIR(path.c_str(), 0777); #endif // if (rv!=0) { return setErrFromErrno(path,abortOnErr); } return clearErr(); } AptErr_t Fs::mkdirPath(const std::string& path, bool abortOnErr) { clearErr(); std::string drive; std::vector parts; splitPath(path,drive,parts); if (parts.size()==0) { return APT_OK; } std::string tmp_path; std::string tmp_drive_path; for (size_t i=0;i tmp_parts; splitPath(path,drive,tmp_parts); std::string tmp_path; std::string tmp_drive_path; while (tmp_parts.size()>0) { tmp_path=Fs::join(tmp_parts); tmp_drive_path=drive+tmp_path; Fs::rmdir(tmp_drive_path,abortOnErr); if (Fs::dirExists(tmp_drive_path)) { return setErr(APT_ERR,tmp_drive_path,abortOnErr); } tmp_parts.pop_back(); } return clearErr(); } ////////// AptErr_t Fs::rm_rf(const std::string& path,bool abortOnErr) { // try and protect against trival mistakes. if ((path=="/")||(path==".")||(path=="")) { return setErr(APT_ERR,"wont attempt to remove "+FS_QUOTE_PATH(path),abortOnErr); } if (fileExists(path)) { return rm(path); } if (dirExists(path)) { std::vector names; listDir(path,names); for (unsigned int i=0;i::open(const wchar_t*, // std::ios_base::openmode&) may not be defined. The problem is // the type of argument #1; it is defined for 'const char*' but // not 'const wchar_t*'. So, instead of calling Fs::UncW(filename) // we call Fs::Unc(filename), which return char*. I'm not 100% // sure what the catch is, but it may be that some filenames that // require UTF-16 will not work. /HB 2012-08-29 std::string wtemp = Fs::Unc(filename) ; fs.open(wtemp.c_str(), iomode); #else fs.open(filename.c_str(), iomode); #endif } void Fs::aptOpen( std::ofstream & ofs, const std::string & filename, std::ios_base::openmode iomode) { #ifdef _WIN32 // PATCH: Apparently std::basic_ofstream::open(const wchar_t*, // std::ios_base::openmode&) may not be defined. The problem is // the type of argument #1; it is defined for 'const char*' but // not 'const wchar_t*'. So, instead of calling Fs::UncW(filename) // we call Fs::Unc(filename), which return char*. I'm not 100% // sure what the catch is, but it may be that some filenames that // require UTF-16 will not work. /HB 2012-08-29 std::string wtemp = Fs::Unc(filename); ofs.open(wtemp.c_str(), iomode); #else ofs.open(filename.c_str(), iomode); #endif } void Fs::aptOpen( std::ifstream & ifs, const std::string & filename, std::ios_base::openmode iomode) { #ifdef _WIN32 // PATCH: Apparently std::basic_ofstream::open(const wchar_t*, // std::ios_base::openmode&) may not be defined. The problem is // the type of argument #1; it is defined for 'const char*' but // not 'const wchar_t*'. So, instead of calling Fs::UncW(filename) // we call Fs::Unc(filename), which return char*. I'm not 100% // sure what the catch is, but it may be that some filenames that // require UTF-16 will not work. /HB 2012-08-29 std::string wtemp = Fs::Unc(filename); ifs.open(wtemp.c_str(), iomode); #else ifs.open(filename.c_str(), iomode); #endif } int Fs::aptOpen( const std::string & pathname, int flags ) { #ifdef _WIN32 std::wstring wtemp = Fs::UncW(pathname); return _wopen(wtemp.c_str(), flags); #else return open(pathname.c_str(), flags); #endif } int Fs::aptOpen( const std::string & pathname, int flags, int mode ) { #ifdef _WIN32 std::wstring wtemp = Fs::UncW(pathname); return _wopen(wtemp.c_str(), flags, mode); #else return open(pathname.c_str(), flags, mode); #endif } std::string Fs::findLibFile(const std::string &fileName, const std::string &searchPath){ if(fileName == "") { // empty string is not valid file name. just return it back return fileName; } else if(Fs::fileExists(fileName)) { // file exists as already specified return fileName; } else { // Now lets search for the file std::vector searchPathVec; if(searchPath != "") { // For path separator use ';' on windows. // Use ':' or ';' on unix. #ifdef _WIN32 Util::chopString(searchPath,';',searchPathVec); #else Util::chopString(searchPath,":;",searchPathVec); #endif } else { char *sp = getenv("AFFX_ANALYSIS_FILES_PATH"); if(sp == NULL) { return fileName; } else { Util::chopString(sp,':',searchPathVec); } } for(int i=0; i< searchPathVec.size(); i++) { if(Fs::fileExists(Fs::join(searchPathVec[i] , fileName))) return Fs::join(searchPathVec[i] , fileName); } } // did not find anything, so just return what we started with return fileName; } /** * Open an ofstream for writing to. Abort if can't open * for some reason. * @param out - stream to be opened. * @param fileName - name of file to be opened. */ void Fs::mustOpenToWrite(std::ofstream &out, const std::string &fileName) { assert(!fileName.empty()); Fs::aptOpen(out, fileName); if(!out.is_open() || !out.good()) { APT_ERR_ABORT("Couldn't open file: " +FS_QUOTE_PATH(fileName) + " to write."); } // Set to throw an exception if something bad happens rather than silently fail. out.exceptions(std::ofstream::eofbit | std::ofstream::failbit | std::ofstream::badbit ); } /** * Close an output stream making sure that it is ok before doing so. * @param out - stream to be closed. */ void Fs::carefulClose(std::ofstream &out) { // If file is open, do some checks to make sure that it was successful... if(out.is_open()) { if(out.bad()) { APT_ERR_ABORT("Fs::carefulClose() - ofstream bad."); } } out.close(); } /** * Close an output stream making sure that it is ok before doing so. * @param out - stream to be closed. */ void Fs::carefulClose(std::fstream &out) { // If file is open, do some checks to make sure that it was successful... if(out.is_open()) { if(out.bad()) { APT_ERR_ABORT("Fs::carefulClose() - ofstream bad."); } } out.close(); } /** * Return true on success. False otherwise * @param in - file to copy * @param out - name of the new file */ bool Fs::fileCopy(const std::string &in, const std::string &out, bool throwOnError) { ///@todo there is probably a better way to copy files and check for errors bool success = true; std::ifstream is; std::ofstream os; std::string iName = in; std::string oName = out; Fs::aptOpen(is, in, std::ios::binary); Fs::aptOpen(os, out, std::ios::binary); if(!is.good() || !os.good()) success = false; os << is.rdbuf(); if(!is.good() || !os.good()) success = false; is.close(); os.close(); if(!is.good() || !os.good()) success = false; if(throwOnError && !success) APT_ERR_ABORT("Unable to copy file '" + iName + "' to '" + oName + "'"); return success; } /** * Not intended to direct use. Only indirectly via Fs::fileRename() * We make multiple attempts to get around read lock issues. */ static bool _uncheckedFileRename(const std::string &in, const std::string &out, int tries, int sec) { tries--; sec *= 3; bool success = true; #ifdef _WIN32 success = (MoveFileExW(Fs::convertToUncPathW(in).c_str(), Fs::convertToUncPathW(out).c_str(), MOVEFILE_REPLACE_EXISTING|MOVEFILE_WRITE_THROUGH) != 0); #else success = (rename(in.c_str(), out.c_str()) == 0); #endif if(!success && (tries > 0)) { #ifdef _WIN32 Sleep(sec); #else sleep(sec); #endif return _uncheckedFileRename(in, out, tries, sec); } return success; } /** * Return true on success. False otherwise * @param in - file to move * @param out - name of the new file */ bool Fs::fileRename(const std::string &in, const std::string &out, bool throwOnError) { bool success = true; success = _uncheckedFileRename(in,out,4,10); if(throwOnError && !success) APT_ERR_ABORT("Unable to rename file '" + in + "' to '" + out + "'"); return success; } affxparser/src/fusion/util/Fs.h0000644000175200017520000004157214516003651017537 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2011 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // // affy/sdk/util/Fs.h --- // #ifndef _UTIL_FS_H_ #define _UTIL_FS_H_ // #include "portability/affy-base-types.h" #include "util/AptErrno.h" #include "util/Util.h" // #include #include #include #include /// @file util/Fs.h /// @brief A class for working with the Os FileSystem. // We are going to use unix conventions internally // and translate them to the os requirements only when needed. // so we define these here. #define FS_PATHSEP "/" //#define FSPATH_INTERNAL_PATHSEP ":" #define FS_EXTSEP "." #define FS_UNC_SHORTLEN (200) /// Adds quotes to the path. #define FS_QUOTE_PATH(_path) std::string("'"+std::string(_path)+"'") /** * @class Fs * @brief An APT interface to the OS file handling utilities. * "Fs" is the simple 'paths as strings' library. * * APT should keep all paths in normalized (unix) format. * Fs.cpp keeps all paths as "/". "\"s are converted to "/"s automaticly. * * This library converts the pathnames to windows as and when needed. * The programmer (you) should always use unix style paths. * * The paths are filesystem paths. * This is different than FsPath which is a richer represenation of * the location of a dataset. * (IE: No internal tablenames or groupnames.) * * All the methods are static to the class. * The only state is the class state of the error number and message. * (Just like the system errno.) * * The last param of the methods is the optional "abortOnErr" * which defaults to "true". * When "true" an error during the call * results in and APT_ERR_ABORT. * When "false" an error code is returned and the caller can * deal with it as they see fit. * * The "InPlace" functions are like the normal ones, but change the strings * Rather than returning a new string. * * Note: The convertToUncPath will not convert an already converted path. * Thus calling it twice on a path should be safe. * We want it to be this way as we dont know how many times * this method will be called on a path. * Note: Many of the file libraries will call "convertToUncPath" for you. * All you need to do is pass them a normalized path. * file/TsvFile, File5 and the SQLite io code does do this; * The legacy file IO does not. * * Note: All APIs are Windows 32KB path size compliant. This requires * the internal use of std::wstring any time a file name is * used. However the std::wstring implies more than just * a path longer than MAX_PATH (260) bytes in lenght. * The std::wstring also implies: * 1.) Unicode. APT code is NOT Unicode compliant. * Therefore APT code should always use * std::string APIs, except when providing * someone additional path, std::string API * not provided here. Do not use std::wstring * for application path names as this may * allow Unicode characters to creep into paths * that have ASCII encoding. File names in APT * are encoded with application information * and parsing of file names in APT is not done * with Unicode parsing. Do not use std::wstring * for file names and paths. * 2.) Unviseral Naming Convention, or UNC. * Unfortunately UNC is cognitive of Unicode. * They do not refer to the same thing. * Microsoft "Universal" means Microsoft * specific. Specifially network paths * must start with "\\?\" and be std::wstring * for ALL paths, even those shorter than MAX_PATH. * All UNC paths then are also Unicode. * For convienence Fs::Unc API will return * the std::string UNC version so inspection * of the full path being used. Given APT is * is NOT Unicode compliant then the Fs::Unc * returns std::string full path for ease of use. * * * */ // Maybe make this a namespace, not a class? class Fs { private: /// @brief The user shouldnt make one one of these objects. /// Make this private so they cant. Fs(); public: // @todo: Make these thread local? // A mutex wouldnt solve it, as the state shouldnt be shared between threads. // For now, we dont worry about threading. /// The error code of the last error. static AptErr_t m_err_num; /// The error message of the last error. static std::string m_err_msg; /// @brief Sets err num and message. Possibly calls Err::errAbort /// @param err_num /// @param err_msg /// @param abortOnErr When true, an error calls Err::errAbort. /// @return An error code. static AptErr_t setErr(AptErr_t err_num,const std::string& err_msg,bool abortOnErr); /// @brief The current error number static AptErr_t getErrNum(); /// @brief The current error string (Should include the pathname.) static std::string getErrMsg(); /// @brief Clears the error num and message /// @return always APT_OK static AptErr_t clearErr(); /// @brief /// @param path /// @param abortOnErr /// @return static AptErr_t setErrFromErrno(const std::string& path,bool abortOnErr); /// @brief /// @param path /// @param sys_errno /// @param abortOnErr /// @return static AptErr_t setErrFromErrno(const std::string& path,int sys_errno,bool abortOnErr); /// @brief Get the internal path seperator. /// @return Always "/" static std::string pathSep(); /// @brief Get the path seperator for the OS or replace path separators for input string. /// @return Could be "\" , "/" or the translated string if one is passed in. static std::string osPathSep( const std::string& path = ""); /// clean up the path. /// @brief /// @param path /// @return static std::string normalizePath(const std::string& path); static AptErr_t normalizePathInPlace(std::string& path); /// turn the path into a windows path string /// @brief /// @param path /// @return static std::string windowifyPath(const std::string& path); static AptErr_t windowifyPathInPlace(std::string& path); /// turn the path into a normal (unix) path string /// @brief /// @param path /// @return static std::string unixifyPath(const std::string& path); static AptErr_t unixifyPathInPlace(std::string& path); /// join strings with the path sep. /// while we would want people to keep the path with FsPath, /// we should face up to that there will be string pasting going on and /// make it less ugly. /// Fs::join("foo","bar") => "foo/bar" static std::string join(const std::string& arg1, const std::string& arg2); static std::string join(const std::string& arg1, const std::string& arg2, const std::string& arg3); static std::string join(const std::string& arg1, const std::string& arg2, const std::string& arg3, const std::string& arg4); static std::string join(std::vector& args); // static bool hasTrailingSlash(const std::string& path_in); static std::string addTrailingSlash(const std::string& path_in); static std::string trimTrailingSlash(const std::string& path_in); /// @brief like the unix function of the same name. /// @param path /// @return the basename of the path (abc/def/foo.bar => foo.bar) static std::string basename(const std::string& path); /// @brief Apply basename to a vector /// @param paths /// @return new vector of basenames. static std::vector basename(const std::vector& paths); /// @brief just the dirname (will return "." or "/" as well.) /// @param path /// @return static std::string dirname(const std::string& path); /// return the stuff after the "." including the "." ("foo.bar" => ".bar") // that way you dont have to worry about adding the "." later. // this preserves the invariant of: // path==join(dirname(path),basename(path)+extname(path)) static std::string extname(const std::string& path); /// chop off ALL the exts. ("path/foo.a.b.c" => "path/foo") static std::string noextname(const std::string& path); /// chop off ONE of the exts. ("path/foo.a.b.c" => "path/foo.a.b") static std::string noextname1(const std::string& path); /// chop off N of the exts ("path/foo.a.b.c",2 => "path/foo.a") static std::string noextnameN(const std::string& path,int cnt); /// static void splitPath(const std::string& path,std::string& drive, std::vector& parts); /// replace the dir and ext for all these filenames /// For example a vector of chip files for output. /// vec[0]="abc/foo.cel" => "outputdir/foo.chp" /// vec[1]="def/bar.cel" => "outputdir/bar.chp" /// vec[2]="def/baz" => "outputdir/baz.chp" static std::vector changeDirAndExt(const std::string& new_dir, const std::vector& paths, const std::string& new_ext); /// static std::string convertToUncPath(const std::string& path); static std::wstring convertToUncPathW(const std::string& path) {return Util::toWString(Fs::convertToUncPath(path,-1)); }; static std::string convertToUncPath(const std::string& path,int uncshortlen); static std::wstring convertToUncPathW(const std::string& path,int uncshortlen) { return Util::toWString( Fs::convertToUncPath( path, uncshortlen )); }; /// Short name for convertToUncPath. static std::string Unc(const std::string& path) { return Fs::convertToUncPath(path); }; static std::wstring UncW(const std::string& path) { return Fs::convertToUncPathW(path); }; static AptErr_t convertToUncPathInPlace(std::string& path); static AptErr_t convertToUncPathInPlace(std::string& path,int uncshortlen); /// convert only the first component of the command to unc. /// return the command string. static std::string convertCommandToUnc(const std::string& cmd); ///// /// @brief Change the mode of a dir or file. /// This is chmodBasic because on Windows only the write bit for the user /// can be set. There is no notion of group and other. /// Files are always readable. /// @param path /// @param mode /// @param abortOnErr /// @return static AptErr_t chmodBasic(const std::string& path,int mode,bool abortOnErr=true); ///// /// static bool exists(const std::string& path); static bool dirExists(const std::string& path); static bool fileExists(const std::string& path); static bool isReadable(const std::string& path); static bool isReadable(const std::string& path,AptErr_t& rv); static bool isReadableDir(const std::string& path); static bool isReadableDir(const std::string& path,AptErr_t& rv); static bool isWriteable(const std::string& path); static bool isWriteable(const std::string& path,AptErr_t& rv); static bool isWriteableDir(const std::string& path); static bool isWriteableDir(const std::string& path,AptErr_t& rv); // is it? static bool isCalvinFile(const std::string& path); static bool isHdf5File(const std::string& path); static bool isBinaryFile(const std::string& path); /// @brief This will make the dir if it isnt there. /// If it is there, that is ok as long as it is writeable. /// This will make a path like mkdirPath. /// @param path /// @param abortOnErr /// @return static AptErr_t ensureWriteableDirPath(const std::string& path,bool abortOnErr=true); /// @brief The size of the file /// @param path The path to /// @param abortOnErr If true, call Err::errAbort when a error occurs. /// @return the size of the file or -1 static int64_t fileSize(const std::string& path,bool abortOnErr=true); static int64_t fileSize(const std::string& path,AptErr_t& errnum); /// @brief /// @param path /// @param names /// @param abortOnErr /// @return static AptErr_t listDir(const std::string& path,std::vector& names,bool abortOnErr=true); /// @brief Make one directory. /// @param path /// @param abortOnErr /// @return static AptErr_t mkdir(const std::string& path,bool abortOnErr=true); /// @brief make all path of directories /// @param path /// @param abortOnErr /// @return static AptErr_t mkdirPath(const std::string& path,bool abortOnErr=true); /// An error if it doesnt exist static AptErr_t rm(const std::string& path,bool abortOnErr=true); /// An error if the rm fails. static AptErr_t rmIfExists(const std::string& path,bool abortOnErr=true); /// @brief Remove a directory /// @param path /// @param abortOnErr /// @return static AptErr_t rmdir(const std::string& path,bool abortOnErr=true); /// @brief /// @param path /// @param abortOnErr /// @return static AptErr_t rmdirPath(const std::string& path,bool abortOnErr=true); /// @brief /// @param path /// @param abortOnErr /// @return static AptErr_t rm_rf(const std::string& path,bool abortOnErr=true); /// @brief touchs a file (creates a zero length file if not exists.) /// @param path /// @param abortOnErr /// @return static AptErr_t touch(const std::string& path,bool abortOnErr=true); /// @brief /// @param path /// @param abortOnErr /// @return static int64_t getFreeDiskSpace(const std::string& path,bool abortOnErr=true); /// @brief Do these two paths refer to the same volume? /// @param path1 /// @param path2 /// @param rv /// @param abortOnErr /// @return True if they are on the same volume. static bool isSameVolume(const std::string& path1, const std::string& path2, AptErr_t& rv, bool abortOnErr=true); #ifdef _MSC_VER static std::string MSC_VER_GetLastErrorString( const std::string& whence = "" ); #endif static int aptOpen( const std::string & pathname, int flags = O_RDONLY); static int aptOpen( const std::string & pathname, int flags, int mode ); static void aptOpen( std::fstream & fs, const std::string & filename, std::ios_base::openmode iomode = std::ios_base::in | std::ios_base::out); static void aptOpen( std::ofstream & ofs, const std::string & filename, std::ios_base::openmode iomode = std::ios_base::out|std::ios_base::binary); static void aptOpen( std::ifstream & ifs, const std::string & filename, std::ios_base::openmode iomode = std::ios_base::in); /// Util carry overs. These need to be updated with abortOnError. /** * Search the affy library file path for the actual file name to open * @param fileName - the name of the file to find * @param searchPath - alternative search path */ static std::string findLibFile(const std::string &fileName, const std::string &searchPath = ""); //#endif /** * Open an ofstream for writing to. Abort if can't open for some reason. * @param out - stream to be opened. * @param fileName - name of file to be opened. */ static void mustOpenToWrite(std::ofstream &out, const std::string& fileName); /** * Close an output stream making sure that it is ok before doing so. * @param out - stream to be closed. */ static void carefulClose(std::ofstream &out); /** * Close an output stream making sure that it is ok before doing so. * @param out - stream to be closed. */ static void carefulClose(std::fstream &out); static bool fileCopy(const std::string &in, const std::string &out, bool throwOnError = true); /** * Return true on success. False otherwise * @param in - file to copy * @param out - name of the new file */ static bool fileRename(const std::string &in, const std::string &out, bool throwOnError = true); }; #endif // _UTIL_FS_H_ affxparser/src/fusion/util/FsPath.cpp0000644000175200017520000004047214516003651020705 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2011 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // // affy/sdk/util/FsPath.cpp --- // // #include "util/FsPath.h" // #include "util/AptErrno.h" #include "util/Convert.h" #include "util/Err.h" #include "util/Fs.h" #include "util/Util.h" // #include #include #include #include // OS stuff for memInfo, getAvailableDiskSpace #ifdef __APPLE__ #include #include #endif #ifdef __linux__ #include #include #endif #ifdef __sun__ #include #include #endif ////////// int FsPath::m_ext2fmt_map_isloaded=0; std::map FsPath::m_ext2fmt_map; ////////// struct FsPath_Ext2fmt_t { const char* m_ext; FsPath::FileFmt_t m_fmt; }; /// @todo add more file extensions here. FsPath_Ext2fmt_t initial_ext2fmt[]= { {"tsv", FsPath::FILEFMT_TSVFILE }, {"txt", FsPath::FILEFMT_TSVFILE }, {"bb", FsPath::FILEFMT_TSVFILE }, // {"a5", FsPath::FILEFMT_FILE5 }, {"hdf5", FsPath::FILEFMT_FILE5 }, {"ref", FsPath::FILEFMT_FILE5 }, // {"chp", FsPath::FILEFMT_CALVIN }, {"cychp", FsPath::FILEFMT_CALVIN }, // end marker. {NULL,FsPath::FILEFMT_NONE} }; void FsPath::addExt2Fmt(const std::string& ext,FsPath::FileFmt_t fmt) { m_ext2fmt_map[ext]=fmt; } /// @todo handle case (e.g. 'A5' && 'a5' are equivalent FsPath::FileFmt_t FsPath::ext2Fmt(const std::string& ext) { if (m_ext2fmt_map_isloaded==0) { FsPath_Ext2fmt_t* p=initial_ext2fmt; while (p->m_ext!=NULL) { addExt2Fmt(p->m_ext,p->m_fmt); p++; } m_ext2fmt_map_isloaded=1; } std::map::iterator i; i=m_ext2fmt_map.find(ext); if (i==m_ext2fmt_map.end()) { return FsPath::FILEFMT_NONE; } return i->second; } ////////// #ifdef _MSC_VER #pragma warning(disable: 4996) #endif FsPath FsPath::getPathFor(const std::string& loc) { FsPath the_path; // @todo put windows code here. char* val=getenv(loc.c_str()); if (val==NULL) { return the_path; } // these locations are always dirs. if ((loc=="HOME") || (loc=="PWD") || (loc=="TMP")) { the_path.setDirName(val); } else { the_path.setPath(val); } return the_path; } ////////// FsPath::FsPath() { init(); } FsPath::FsPath(const FsPath& fspath) { copyFrom(fspath); } FsPath::FsPath(const std::string& path) { init(); setPath(path); } FsPath::FsPath(const std::string& dir,const std::string& name,const std::string& ext) { init(); m_dir_name=Fs::addTrailingSlash(dir); m_file_name=name; m_file_ext=ext; } // In "harley-speak" init is a bigger "clear()" void FsPath::init() { // m_err_doAbort=true; // clear(); } // void FsPath::clear() { m_original_path=""; // m_dir_drive=""; m_dir_name=""; m_file_name=""; m_file_ext=""; m_filefmt=FsPath::FILEFMT_AUTO; // m_internal_group_name=""; m_internal_table_name=""; } #define FSPATH_DUMP_D(_x) { printf(" %-30s = '%d'\n",#_x,_x); } #define FSPATH_DUMP_S(_x) { printf(" %-30s = '%s'\n",#_x,_x.c_str()); } void FsPath::dump() { printf("== FsPath(%p):\n",this); FSPATH_DUMP_S(m_original_path); FSPATH_DUMP_S(m_dir_name); FSPATH_DUMP_S(m_file_name); FSPATH_DUMP_S(m_file_ext); FSPATH_DUMP_D(m_filefmt); FSPATH_DUMP_S(m_internal_group_name); FSPATH_DUMP_S(m_internal_table_name); std::string str; str=asUnixPath(); printf(" As asUnixPath(): '%s'\n",str.c_str()); str=asString(); printf(" As asString(): '%s'\n",str.c_str()); } bool FsPath::empty() { if ((m_dir_name=="") && (m_file_name=="") && (m_file_ext=="") && (m_internal_group_name=="") && (m_internal_table_name=="")) { return true; } return false; } void FsPath::copyFrom(const FsPath& path) { // dont bother to do anything when self assigning. if (this==&path) { return; } // first opts... // m_opt_pathsep=path.m_opt_pathsep; // m_opt_internalsep=path.m_opt_internalsep; // m_opt_extsep=path.m_opt_extsep; // ... then values // m_original_path=path.m_original_path; m_dir_drive=path.m_dir_drive; m_dir_name=path.m_dir_name; m_file_name=path.m_file_name; m_file_ext=path.m_file_ext; m_filefmt=path.m_filefmt; m_internal_group_name=path.m_internal_group_name; m_internal_table_name=path.m_internal_table_name; } void FsPath::copyDirNameFrom(const FsPath& path) { m_dir_name=path.m_dir_name; } void FsPath::copyFileNameFrom(const FsPath& path) { m_file_name=path.m_file_name; m_file_ext=path.m_file_ext; m_filefmt=path.m_filefmt; } #define FSPATH_NEQ(_slot) { if (_slot!=path._slot) { return false; } } bool FsPath::equals(const FsPath& path) { // FSPATH_NEQ(m_original_path) => for debugging - we dont care about its value. FSPATH_NEQ(m_dir_name); FSPATH_NEQ(m_file_name); FSPATH_NEQ(m_file_ext); FSPATH_NEQ(m_internal_group_name); FSPATH_NEQ(m_internal_table_name); //m_filefmt => derived dont test. // return true; } void FsPath::setDirDrive(const std::string& val) { // dont run unixify string; This should just be a letter and colon: "C:" m_dir_drive=val; }; void FsPath::setDirName(const std::string& val) { std::string tmp=val; // convert it to our internal (unix) format. Fs::unixifyPathInPlace(tmp); // split off the drive if (tmp[1]==':') { m_dir_drive=tmp.substr(0,2); tmp.erase(0,2); } // assign with an optinal trailing slash m_dir_name=Fs::addTrailingSlash(tmp); } ////////// void FsPath::setErrAbort(bool abortOnErr) { m_err_doAbort=abortOnErr; } AptErr_t FsPath::getErrNum() const { return m_err_num; } AptErr_t FsPath::setErr(AptErr_t err_num,const std::string& err_msg) { m_err_num=err_num; m_err_msg=err_msg; if (m_err_num!=APT_OK) { // for debugging //printf("FsPath::setErr: err=%d msg='%s'\n",m_err_num,m_err_msg.c_str()); // if (m_err_doAbort==true) { Err::errAbort(err_msg); } } return m_err_num; } AptErr_t FsPath::setErrFromFs() { return setErr(Fs::getErrNum(),Fs::getErrMsg()); } AptErr_t FsPath::clearErr() { m_err_num=APT_OK; m_err_msg=""; return m_err_num; } ////////// AptErr_t FsPath::setPath(const FsPath& fspath) { this->copyFrom(fspath); return APT_OK; } AptErr_t FsPath::setPath(const std::string& dirname, const std::string& file_name, const std::string& file_ext) { m_dir_name=Fs::addTrailingSlash(dirname); m_file_name=file_name; m_file_ext=file_ext; return APT_OK; } // See class docs for examples of valid paths. // No escaped backslashed characters allowed AptErr_t FsPath::setPath(const std::string& path) { std::string tmp_os_path; std::string tmp_internal_path; std::string::size_type pos; clear(); // save it off the original path m_original_path=path; if (path=="") { return APT_OK; } // tmp gets chopped up as we work and path is const, so we need a copy. tmp_os_path=path; // convert any MS-like paths to normal paths. Fs::unixifyPathInPlace(tmp_os_path); // if there is a drive letter, pull it off first // note this is ambiguous with a single letter file name with an internal // group/table // "c:foo" if (tmp_os_path[1]==':') { m_dir_drive=tmp_os_path.substr(0,2); tmp_os_path.erase(0,2); } // The last ":" of the string seperates the internal name // ...../FILE.EXT:INTERNAL_GROUP/INTERNAL_TABLE. pos=tmp_os_path.rfind(FSPATH_INTERNAL_PATHSEP); // order is important, pull out the internal ":group/table" first. if (pos!=std::string::npos) { tmp_internal_path=tmp_os_path.substr(pos+1); tmp_os_path.erase(pos,tmp_os_path.size()); } // now we have two parts... // ...do the os part first... pos=tmp_os_path.rfind(FSPATH_PATHSEP); if (pos==std::string::npos) { // no dir parts. m_dir_name=""; m_file_name=tmp_os_path; } else { m_dir_name=Fs::addTrailingSlash(tmp_os_path.substr(0,pos+1)); if (pos FsPath::getDirNames() const { std::string drive; std::vector parts; Fs::splitPath(m_dir_name,drive,parts); return parts; } ////////// FsPath::FileFmt_t FsPath::fmtFromExt() const { return ext2Fmt(m_file_ext); } FsPath::FileFmt_t FsPath::getFileFmt() const { if (m_filefmt==FsPath::FILEFMT_AUTO) { return fmtFromExt(); } return m_filefmt; } std::string FsPath::getBaseName() const { std::string out; out=m_file_name; if (m_file_ext!="") { out+=FSPATH_EXTSEP+m_file_ext; } return out; } std::string FsPath::asUnixPath() const { std::string out; out=m_dir_name; // add a sep if needed. if ((out.size()>0)&&(out.compare(out.size()-1,1,FSPATH_PATHSEP)!=0)) { out+=FSPATH_PATHSEP; } // now tack on the name if there. out+=m_file_name; if (m_file_ext!="") { out+=FSPATH_EXTSEP+m_file_ext; } // add the "C:" if there if (m_dir_drive!="") { out=m_dir_drive+out; } // return out; } const char* FsPath::asUnixPathCstr() const { // put the string an a buffer. m_tmp_cstr=asUnixPath(); // return the "char*" for the user. return m_tmp_cstr.c_str(); } // read the following for why this is required: // http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx // "Naming Files, Paths, and Namespaces" // In short: paths longer that 260 chars cant be processed, unless // (a) they are absolute // (b) they start with "\\?\". // std::string FsPath::asUncPath() const { std::string tmp=asUnixPath(); #ifdef _MSC_VER Fs::convertToUncPathInPlace(tmp); #endif return tmp; } const char* FsPath::osUncPathCstr() const { // put the string an a buffer. m_tmp_cstr=asUncPath(); // return the "char*" for the user. return m_tmp_cstr.c_str(); } std::string FsPath::asString() const { std::string out; out=m_dir_drive+m_dir_name+m_file_name; // if (m_file_ext!="") { out+=FSPATH_EXTSEP+m_file_ext; } // if ((m_internal_group_name!="")||(m_internal_table_name!="")) { out+=FSPATH_INTERNAL_PATHSEP; if (m_internal_group_name!="") { out+=m_internal_group_name+FSPATH_PATHSEP; } if (m_internal_table_name!="") { out+=m_internal_table_name; } } // return out; } ////////// bool FsPath::isAbsolute() const { if (m_dir_name.find(FSPATH_PATHSEP)==0) { return true; } return false; } bool FsPath::isFileName() const { if ((m_file_name!="") || (m_file_ext!="")) { return true; } return false; } bool FsPath::isDirName() const { if ((m_dir_name!="") && (m_file_name=="") && (m_file_ext=="")) { return true; } return false; } ////////// bool FsPath::exists() const { return Fs::exists(asUnixPath()); } bool FsPath::dirExists() const { return Fs::dirExists(asUnixPath()); } bool FsPath::fileExists() const { return Fs::fileExists(asUnixPath()); } ////////// void FsPath::pushDir(const std::string& dir) { m_dir_name=Fs::addTrailingSlash(Fs::addTrailingSlash(m_dir_name)+dir); } bool FsPath::canPopDir() const { if ((m_dir_name==FSPATH_PATHSEP) || (m_dir_name==".") || (m_dir_name=="")) { return false; } return true; } std::string FsPath::popDir() { std::string popped; std::string::size_type pos; //dump(); //printf("popDir('%s')==",osPathCstr()); if (m_dir_name=="") { return ""; } // cant pop an abs path. if (m_dir_name==FSPATH_PATHSEP) { return FSPATH_PATHSEP; } // m_dir_name=Fs::trimTrailingSlash(m_dir_name); pos=m_dir_name.rfind(FSPATH_PATHSEP); // no "/" found, the entire thing. if (pos==std::string::npos) { popped=m_dir_name; m_dir_name=""; } else if (pos==0) { popped=m_dir_name.substr(pos+1); m_dir_name=FSPATH_PATHSEP; } else { popped=m_dir_name.substr(pos+1); m_dir_name.erase(pos,m_dir_name.size()-pos); } //printf("'%s'\n",popped.c_str()); return popped; } ////////// bool FsPath::isReadable() const { return Fs::isReadable(asUnixPath()); } bool FsPath::isWriteableDir() const { return Fs::isWriteableDir(asUnixPath()); } bool FsPath::isWriteableDir(AptErr_t& rv) const { return Fs::isWriteableDir(asUnixPath()); } AptErr_t FsPath::ensureWriteableDirPath() { // already writeable? if (isWriteableDir()) { return APT_OK; } // create the path. mkdirPath(false); // check that we can write to the last dir. if (isWriteableDir()==true) { return APT_OK; } return setErr(APT_ERR,"FsPath::ensureWriteableDir() failed."); } ////////// ////////// AptErr_t FsPath::chmod(int mode,bool errIfNotExists) { Fs::chmodBasic(asUnixPath(),mode); return setErrFromFs(); } AptErr_t FsPath::mkdir(bool errIfExists) { Fs::mkdir(asUnixPath()); return setErrFromFs(); } AptErr_t FsPath::mkdirPath(bool errIfExists) { std::vector dir_names=getDirNames(); FsPath tmp_path(*this); tmp_path.clear(); tmp_path.setErrAbort(false); clearErr(); for (unsigned int i=0;i& names) { return Fs::listDir(asUnixPath(),names); } ////////// AptErr_t FsPath::touch() { Fs::touch(asUnixPath()); return setErrFromFs(); } ////////// int64_t FsPath::getFreeDiskSpace() const { return Fs::getFreeDiskSpace(asUnixPath()); } ////////// bool FsPath::isSameVolume(const std::string& path2) { AptErr_t rv; return Fs::isSameVolume(asUnixPath(),path2,rv,m_err_doAbort); } bool FsPath::isSameVolume(const FsPath& path2) { AptErr_t rv; return Fs::isSameVolume(asUnixPath(),path2.asUnixPath(),rv,m_err_doAbort); } affxparser/src/fusion/util/FsPath.h0000644000175200017520000003154214516003651020350 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2011 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // // sdk/apt2/dao/FsPath.h --- // // $Id: FsPath.h,v 1.2 2009-11-04 20:37:32 harley Exp $ // #ifndef _UTIL_FSPATH_H_ #define _UTIL_FSPATH_H_ // tell other files to use our methods for file handling // this is just while we clean things up. // #define APT_FORCE_USE_FSPATH 1 // #include "portability/affy-base-types.h" #include "util/AptErrno.h" // #include #include #include /// @file util/FsPath.h /// @brief A class for working with richer filepaths than just a string. // We are going to use unix conventions in the internal format // and translate them to the os requirements only as needed. // so we define these here. // They are not contitional on the platform. (unix & win32) #define FSPATH_PATHSEP "/" #define FSPATH_INTERNAL_PATHSEP ":" #define FSPATH_EXTSEP "." /** * @class FsPath * @brief A class for working with richer filepaths than just a string. * APT would like to be able to name where to read and write data. * when there was just one dataset per file the filename was sufficient to * to do this. The dataset name was simply mapped into the * file system. Now things are more complicated. A file * can contain several datasets. * * Currently this is handled by also passing in more options * to name the group and table. However this is done * different ways in APT. The goal of FsPath is to be * able to name these datasets and provide a convenient set of * methods to work with the names. * * FsPath keeps its names in seperate pieces so the can be * worked on and recombined without parsing a string in the * code. The directory name, file name, internal group name * and tablename (dataset) are all kept in the one object. * This FsPath can be passed to the File libraries who can * extract what they need. * * The format of the FsPath is: * /os/path/name/filename.EXT:group/names/tablename * * (A) The path in the os. ("/os/path/name") * (B) The filename and extension. ("filename.EXT") * (C) The external/internal separator (":") * (D) The internal path. ("group/names") * file5 could have several parts. * calvin must have one and only one level of group names. * TsvFile can be extended to use the filesystem to have groups. * (E) The tablename/dataset name ("tablename") * * With all this we could then write: * * FsPath path("path/to/file.EXT:/group/table") * DaoDataSet dataset; * * dataset->open(path); * * and the EXT would do the magic for Calvin, File5 or TsvFile. * * Thinking about this some more, it might be best to break out * the operating system calls into a seperate module which takes a string * or a FsPath object to do things with. * * Note: according to APT-380, "Desktop" and other special windows dirs * might be flagged as write-only, but actually be writeable. * If this is really the case then "isWriteable" should be a two step process: * if (s_writeable()) { return true; } * if (is_windows()) { if (try_and_make_a_dir_works()) { return true; } } * return false. * * Ideas: * * Add "void*" to the actual file being opened for reference. * and marker for what type is being held. * * * To migrate current functionality: * - UncPath() here * - BaseName() * - FileRoot() * - Openers for raw streams. * - IsWritable() or equivalent */ class FsPath { public: /// Codes for the different formats APT might expect files to be in. enum FileFmt_t { FILEFMT_NONE = 0, FILEFMT_ANY, FILEFMT_AUTO, // FILEFMT_CALVIN, FILEFMT_FILE5, // we dont read straight text, but APT might know it is a text file FILEFMT_TEXT, FILEFMT_TSVFILE, }; /// get a FsPath for a named location. (HOME, TMP, CWD) /// presently read from the process environment. static FsPath getPathFor(const std::string& loc); /// add a mapping from ".foo" to FILEFMT_FOO static void addExt2Fmt(const std::string& ext,FsPath::FileFmt_t fmt); /// convert the extention to the format based on the extension. static FileFmt_t ext2Fmt(const std::string& ext); /// new and empty path FsPath(); /// a new copy of fspath FsPath(const FsPath& fspath); /// new fspath and call setPath. FsPath(const std::string& path); /// new fspath and the user tells us the three parts. FsPath(const std::string& dir,const std::string& name,const std::string& ext); /// clear and reset the defaults. void init(); /// clear the data only. void clear(); /// dump the FsPath to stdout. void dump(); /// True if it does not have a value. bool empty(); /// when set to false, dont throw an exception on an error. void setErrAbort(bool abortOnErr); /// the error value. AptErr_t getErrNum() const; /// clear the error; always returns APT_OK AptErr_t clearErr(); /// sets the error value and throws if abortOnErr==true AptErr_t setErr(AptErr_t err_num,const std::string& err_msg); /// copies the error code from the current APT Fs error code AptErr_t setErrFromFs(); /// parses the string into its parts. AptErr_t setPath(const std::string& pathname); /// copies the value AptErr_t setPath(const FsPath& fspath); /// sets the three components at once. (no parsing.) AptErr_t setPath(const std::string& dirname,const std::string& file_name,const std::string& file_ext); /// assignment. const FsPath& operator=(const FsPath& path) { copyFrom(path); return *this; } /// assignment again. void copyFrom(const FsPath& path); /// just copy the directory parts. void copyDirNameFrom(const FsPath& path); /// just copy the filename parts. void copyFileNameFrom(const FsPath& path); // @todo // void copyInternalNameFrom(const FsPath& path); /// do the names match? bool operator==(const FsPath& path) { return equals(path); } /// do the names match? bool equals(const FsPath& path); /// set the option of pathsep. // void setPathSep(const std::string& val) { m_opt_pathsep=val; }; /// The value for windows. ("C:") void setDirDrive(const std::string& val); /// treat the entire path as a dir name. void setDirName(const std::string& val); // @todo // setDirName(const std::vector dirnames); /// sets the file name (not the dirname or ext) void setFileName(const std::string& val) { m_file_name=val; }; /// sets the file and ext (not the dirname) void setFileNameExt(const std::string& file,const std::string& ext) { m_file_name=file; m_file_ext=ext; }; /// sets the extension (".foo") void setFileExt(const std::string& val) { m_file_ext=val; }; /// sets the internal group name (like for file5) void setInternalGroupName(const std::string& val) { m_internal_group_name=val; }; /// sets the internal table name (like for file5) void setInternalTableName(const std::string& val) { m_internal_table_name=val; }; /// the orginal path given to setPath (For debugging) std::string getOriginalPath() const { return m_original_path; }; /// the windows drive std::string getDirDrive() const { return m_dir_drive; }; /// the directory path (no slash) std::string getDirName() const; /// the directory path (with slash) std::string getDirNameSlash() const; /// the directory path as a list of components. std::vector getDirNames() const; /// the filename (wo ext; "foo.bar" => "foo") std::string getFileName() const { return m_file_name; }; /// the filename (wo ext; "foo.bar" => "foo") std::string getFileNameWoExt() const { return m_file_name; }; /// just the extention ("foo.bar" => "bar") std::string getFileExt() const { return m_file_ext; }; /// the file and extension. (like unix basename) std::string getBaseName() const; /// the internal group name (like for file5) std::string getInternalGroupName() const { return m_internal_group_name; }; /// the internal table name (like for file5) std::string getInternalTableName() const { return m_internal_table_name; }; /// push a dirname on the end of the dir path. void pushDir(const std::string& dir); /// could we pop another name off the directory path? bool canPopDir() const; /// pops a dirname off the end of the path. ("foo/bar/baz" => "foo/bar" returns "baz") std::string popDir(); /// map the extension to a filefmt. FsPath::FileFmt_t fmtFromExt() const; FsPath::FileFmt_t getFileFmt() const; void setFileFmt(FsPath::FileFmt_t fmt) { m_filefmt=fmt; }; /// a printable string which could be passed to our "setPath()" /// but it cant be opened in the filesystem. /// it has the internal group and table names. std::string asString() const; /// format the path for unix. (Which is our internal format.) std::string asUnixPath() const; /// format the path and return a tmp "const char*". const char* asUnixPathCstr() const; /// what should pass to the OS to open with. (a no-op on unix) std::string asUncPath() const; /// selects asWindowsPath or asUnixPath. const char* osUncPathCstr() const; /// is this an absolute path? "/foo/..."? bool isAbsolute() const; /// Does this name a directory? (no file name or ext) bool isDirName() const; /// Does this name a file? (file name or ext is set.) bool isFileName() const; /// does this exist in the filesystem? (file or dir) bool exists() const; bool dirExists() const; bool fileExists() const; /// is this a directory we can write to? /// note that this is special on windows as we actually test it. bool isWriteableDir() const; bool isWriteableDir(AptErr_t& rv) const; /// Could we read from this path? (file or dir) bool isReadable() const; /// Could we write to this path? (file or dir) bool isWriteable() const; /// the size of the file in bytes. int64_t fileSize(); /// true if is a file and is in HDF5 format. /// bool osIsHdf5() const; /// tries to make one directory (just the last part of dirpath) AptErr_t mkdir(bool errIfExists=true); /// tries to make the entire path. (like mkdir -p) AptErr_t mkdirPath(bool errIfExists=true); /// /// Makes AptErr_t ensureWriteableDirPath(); /// remove the directory. (dir must be empty.) AptErr_t rmdir(bool errIfNotExists=true); /// remove the path of dirs (all dirs must be empty.) AptErr_t rmdirPath(bool errIfNotExists=true); /// returns a list of names in the dir. AptErr_t listDir(std::vector& names); /// removes a file. AptErr_t rm(bool errIfNotExists=false); /// changes the mode (dir or file) AptErr_t chmod(int mode,bool errIfNotExists=true); /// creates the file if it does not exist. AptErr_t touch(); /// rename the file to the new name // AptErr_t renameTo(const std::string& to_name); // AptErr_t renameTo(const FsPath& to_path); /// copy the file to the path // AptErr_t copyTo(const std::string& to_name); // AptErr_t copyTo(const FsPath& to_path); /// the free space measured in bytes. int64_t getFreeDiskSpace() const; /// are the paths on the same volume? bool isSameVolume(const std::string& dirname); /// are the paths on the same volume? bool isSameVolume(const FsPath& fspath); /// are the paths on the same volume? bool isSameVolume(const FsPath& fspath,AptErr_t& rv); ////////// private: /// true=throw and error when m_err_num != APT_OK bool m_err_doAbort; /// the last error. AptErr_t m_err_num; /// the last error message std::string m_err_msg; /// The path as it was given to us. /// only used for debugging. std::string m_original_path; /// the windows drive "C:" std::string m_dir_drive; /// the directory path "/var/tmp" std::string m_dir_name; /// the filename part of the name: "myfile" std::string m_file_name; /// the extension part of the name: ".cpp", ".h" std::string m_file_ext; /// tmp string used for buffer the return of a "const char*" mutable std::string m_tmp_cstr; /// the group name inside the file. std::string m_internal_group_name; /// the tablename/dataset inside the file. std::string m_internal_table_name; // FsPath::FileFmt_t m_filefmt; // how we map the extensions to FileFmts. static std::map m_ext2fmt_map; // for the inital setup. static int m_ext2fmt_map_isloaded; }; #endif // _UTIL_FSPATH_H_ affxparser/src/fusion/util/Guid.cpp0000644000175200017520000001240614516003651020404 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifdef _MSC_VER #define _CRT_SECURE_NO_WARNINGS #endif #ifdef _WIN32 #include #endif // #include "util/Guid.h" // #include "portability/affy-system-api.h" #include "util/Convert.h" #include "util/Err.h" #include "util/chksum.h" // #include #include #include #include #include #include // ////////// using namespace affxutil; ////////// #ifdef _MSC_VER #define _CRT_SECURE_NO_WARNINGS #endif // Since we are using "*" we want to avoid zero when mixing in data to the seed... #define AFFY_SEEDVAL(VAL) do { \ unsigned int val=(unsigned int)(VAL); \ if (val!=0) { \ seed=seed*val; \ } \ } while (0) /// Have we seeded srand already? static int affy_seed_srand_done=0; void affxutil::affy_seed_srand() { // the pid is included for the case where two processes // start up in the same second. unsigned int seed; // start with a number... seed=314159; // add more sources of randomness here. AFFY_SEEDVAL(time(NULL)); AFFY_SEEDVAL(getpid()); // gethostid is zero on darwin, but that is ok. AFFY_SEEDVAL(gethostid()); // Check the env var for a seed (for debugging) #ifndef WIN32 char* debug_seed_str=getenv(AFFY_DEBUG_SRAND_SEED_ENVVAR); if (debug_seed_str!=NULL) { char* endptr; unsigned int debug_seed=strtol(debug_seed_str,&endptr,0); // did we parse the entire string? if ((endptr==debug_seed_str)||(*endptr!=0)) { // nope! Err::errAbort("Unable to parse env var: '"+ToStr(AFFY_DEBUG_SRAND_SEED_ENVVAR)+"'='"+ToStr(debug_seed_str)+"'"); } /// @todo: log the seed and tell the user we are using a debugging seed. // set it... seed=debug_seed; } #endif // tell srand our seed value... srand(seed); // remember that the seeding has been done... affy_seed_srand_done=1; } /// @brief Ensure that the seed as been set once. void affxutil::ensure_srand_seeded() { if (affy_seed_srand_done==0) { affxutil::affy_seed_srand(); } } Guid::Guid() { #ifdef WIN32 // Initialize Winsock library WSADATA wsaData; WORD wVersionRequested = MAKEWORD(1, 1); int nResult = WSAStartup(wVersionRequested, &wsaData); #endif affxutil::ensure_srand_seeded(); } Guid::~Guid() { // Clean up the socket library. #ifdef WIN32 WSACleanup(); #endif } // Check which format to use... #ifdef AFFY_GUID_FORMAT_RFC4122 // See the following references: // http://www.ietf.org/rfc/rfc4122.txt // http://en.wikipedia.org/wiki/Globally_Unique_Identifier GuidType Guid::GenerateNewGuid() { affxutil::ensure_srand_seeded(); // generate the random bits of the guid... int data1=rand(); int data2=(rand()%0xFFFF); int data3=rand(); // mark this guid as version 4 (a pseudo-random guid) // the version is in the high byte. data3=data3&0x0FFF; data3=data3|0x4000; // data4 is 8 bytes. We generate it in three groups. int data41=(rand()&0xFFFF); // 2B int data42=(rand()&0xFFFFFF); // 3B int data43=(rand()&0xFFFFFF); // 3B // f81d4fae-7dec-11d0-a765-00a0c91e6bf6 char buf[100]; snprintf(buf,sizeof(buf),"%08x-%04x-%04x-%04x-%06x%06x", data1,data2,data3,data41,data42,data43); // std::stringify it. GuidType guid=buf; return guid; } #else /* * Create a new guid based on the host name, current time and random numbers. * A checksum of the string values are taken so as to remove any user * interpretive information. This should strictly just be an identifier. */ GuidType Guid::GenerateNewGuid() { const int GUID_LENGTH = 54; char guid[GUID_LENGTH+1]; const int MAX_HOST_NAME = 64; char hostname[MAX_HOST_NAME]; time_t currentTime = time(NULL); affxutil::ensure_srand_seeded(); gethostname(hostname, MAX_HOST_NAME); /// @todo This is bad -- consecutive calles to "rand()" are not random. /// The values should be hashed with something stronger than ones complement. /// @todo printf will pad with "0" if the format string has a leading "0". /// The format string should be: "%010d-%010d-%010d-%010d-%010d", /// And is this what a uuid is supposed to look like? /// 'uuidgen' prints a value like: "a5b4b2ca-006d-4b89-b7ca-14eedb1e02b9" snprintf(guid,sizeof(guid), "%10d-%10d-%10d-%10d-%10d", CheckSum::OnesComplementCheckSum(hostname, strlen(hostname)/2), (int) currentTime, rand(), rand(), rand()); guid[GUID_LENGTH] = 0; // fill with zeros for (int i=0; i #include #include /// /// RFC4122 v4 Guid => 09fdee5a-b19e-4101-0668-0840519ad701 /// Classic APT Guid => 0000007361-1232759575-1336980168-0774759233-0407464966 /// /// The format of the guids affxutil::GenerateNewGuid() will make. /// Comment it out for the old format. #define AFFY_GUID_FORMAT_RFC4122 1 /// The env var to read the seed to srand from on unix. /// This is handy for debugging. #define AFFY_DEBUG_SRAND_SEED_ENVVAR "APT_DEBUG_SRAND_SEED" namespace affxutil { /// @brief Ensure that srand has been seeded. /// Safe to call multiple times. void ensure_srand_seeded(); /// @brief Seed srand with data from the system env /// Or from 'APT_DEBUG_SRAND_SEED' on unix. /// It will always set the srand seed when called. void affy_seed_srand(); /*! The GUID type */ typedef std::string GuidType; /*! An STL list of guid types */ typedef std::list GuidTypeList; /*! This class provides functions for creating globally unique identifiers. */ /// @todo: Why do we need GuidType and this class? class APTLIB_API Guid { public: /*! Constructor */ Guid(); /*! Destructor */ ~Guid(); /*! Generates a new GUID. * * @return The new GUID. */ static GuidType GenerateNewGuid(); }; }; #endif // _UTIL_GUID_H_ affxparser/src/fusion/util/LineFile.cpp0000644000175200017520000000664514516003651021213 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2011 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file LineFile.cpp * @author Mybrid Spalding * @date Wed Feb 2 10:04:48 PST 2011 * @brief Test class for reading files by line that is platform safe. */ // #include "util/LineFile.h" // #include "util/Err.h" #include "util/Fs.h" #include "util/Util.h" // /** * What type of line endings do we see in this input stream? * @param input - stream to look for '\r','\n', or '\r\n' in. * @return type of line endings we expect in the future. */ static enum LineFile::FileLineEnding _determineLineEndingType(std::ifstream & fileStream) { enum LineFile::FileLineEnding ending = LineFile::UNKNOWN; while(fileStream.good()) { char c1 = fileStream.get(); // '\r' alone indicates mac, '\r\n' indicates dos if(c1 == '\r') { if(fileStream.good() && fileStream.get() == '\n') { ending = LineFile::DOS; } else { ending = LineFile::MAC; } break; } // is it a unix file? else if(c1 == '\n') { ending = LineFile::UNIX; break; } } // reset stream back to beginning of file. fileStream.seekg(0); return ending; } /** * Constructor. */ LineFile::LineFile(): m_endType(UNKNOWN) {} /** * Destructor. */ LineFile::~LineFile() { if(m_fileStream.is_open()) { m_fileStream.close(); } }; /** * Open file fileName or die trying. * @param char * fileName - full path of fileName to be opened. */ std::ifstream & LineFile::open(const std::string& fileName, bool abortOnError) { if(m_fileStream.is_open()) { m_fileStream.close(); } m_fileName = fileName; Fs::aptOpen(m_fileStream, m_fileName, std::ios_base::in); if(!m_fileStream.is_open()) { if(abortOnError) { Err::errAbort("Can't open file " + FS_QUOTE_PATH(fileName) + " to read."); } } else { m_endType = _determineLineEndingType(m_fileStream); } return m_fileStream; } /** * Close file streams. */ void LineFile::close() { if(m_fileStream.is_open()) { m_fileStream.close(); } } /** * LineFile::getline * * Read the next line if the stream is open. * When eof is reached, close the file. * @return - * at EOF. */ bool LineFile::getline(std::string & line, bool trim) { line.clear(); if(!m_fileStream.is_open() || m_fileStream.fail() || m_fileStream.eof()) { return false; } if(m_endType == MAC) { std::getline(m_fileStream, line, '\r'); } else { std::getline(m_fileStream, line, '\n'); // if ms-dos chop off the '\r' if(m_endType == DOS && line.length() > 0) { line.erase(line.end() - 1); } } if(trim && (line.length() > 0)) { Util::trimString(line); } return true; } affxparser/src/fusion/util/LineFile.h0000644000175200017520000000637614516003651020661 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file LineFile.h * @author Mybrid Spalding * @date Mon May 23 11:27:16 2005 * * @brief DO NOT USE. Except in cases of test code. Platform safe file class to read lines. */ #ifndef _LINE_FILE_H_ #define _LINE_FILE_H_ // #include #include // /** * LineFile * @brief Platform safe class for reading lines. * * For Test Code ONLY. * Chances are you should be using file/TsvFile/TsvFile * and not this object. This file is intended for test code in util/ * being used by file/TsvFile/TsvFile and other * code not using TsvFile. */ class LineFile { public: /** Line endings are platform specific, this encodes different types we know of. */ enum FileLineEnding { UNIX = 0, // '\n' DOS = 1, // '\r\n' MAC = 2, // '\r' UNKNOWN = 3 // ??? }; /** * Constructor. */ LineFile(); /** * @brief Destructor closes stream. */ virtual ~LineFile(); /** * @brief Platform safe open for read, can be used multiple times * with different file names. * @return ifstream in question. * */ std::ifstream & open(const std::string & fileName, bool abortOnError = false); /** * @brief close the ifstream if needed. * @return void */ void close(); /** * @brief Platform safe reading of a line, stripping the eol. * File is automatically closed when eof is reached. * @param line - returned if possible, cleared otherwise. * @param trim - false by default, true strips trailing spaces. * @return true if line is returned. * line will be cleared when false is returned. . */ bool getline(std::string & line, bool trim = false); /** * @brief What type of file is this one? * @return Type of file that has been determined. */ enum FileLineEnding getFileType() { return m_endType; } /** * @brief What is the name of the file we're reading from? * @return name of file being read. */ std::string getFileName() { return m_fileName; } /** * @brief - get the input file stream. * @return ifstream reference. */ std::ifstream & getStream() { return m_fileStream; } bool is_open() { return m_fileStream.is_open(); } bool fail() { return m_fileStream.fail(); } bool eof() { return m_fileStream.eof() ; } private: std::string m_fileName; std::ifstream m_fileStream; enum FileLineEnding m_endType; }; #endif /* _LINE_FILE_ */ affxparser/src/fusion/util/LogStream.cpp0000644000175200017520000002421214516003651021407 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file LogStream.cpp * @author Chuck Sugnet * @date Tue Mar 21 10:35:22 PST 2006 * * @brief Output verbose messages to log file as we go along. */ #include "util/LogStream.h" // #include "util/AffxTime.h" #include "util/Util.h" // #ifdef __APPLE__ #include #include #endif // #ifdef __linux__ #include "stdlib.h" #include "stdio.h" #include "string.h" #endif // #ifdef WIN32 #include "windows.h" #include "psapi.h" #endif void printHeader(std::ostream *out, bool profile) { if(out != NULL && out->good()) { (*out) << "Timestamp\t"; if(profile) { (*out) << "Total RAM\t"; (*out) << "Free RAM\t"; (*out) << "Swap Available\t"; (*out) << "Memory Available\t"; (*out) << "Memory Available at Start\t"; (*out) << "Memory Available at Block Start\t"; (*out) << "Memory Used Since Start\t"; (*out) << "Memory Used Since Block Start\t"; #ifdef __APPLE__ (*out) << "Memory Resident Size\t"; (*out) << "Memory Virtual Size\t"; #endif #ifdef __linux__ (*out) << "Memory Resident Size\t"; (*out) << "Memory Virtual Size\t"; #endif #ifdef WIN32 (*out) << "Memory Resident Size\t"; (*out) << "Memory Virtual Size\t"; #endif } (*out) << "Message\n"; } } /** Constructor. */ LogStream::LogStream(int verbosity, std::ostream *out, bool profile) : m_Verbosity(verbosity), m_Out(out), m_Profile(profile) { printHeader(m_Out, m_Profile); } LogStream::~LogStream() { m_ProgressTimeStart.clear(); m_ProgressTotal.clear(); } void LogStream::setStream(std::ostream *out) { m_Out = out; printHeader(m_Out, m_Profile); } void LogStream::unsetStream(std::ostream *out) { m_Out = NULL; } /** * A message to be processed by the stream. * * @param verbosity - What level of verbosity is associated with this message, higher number == more verbosity. * @param log - Message that is to be processed. * @param delimiter - Should a delimiter be emitted as well? */ void LogStream::message(int verbosity, const std::string &log, bool delimiter) { if(verbosity <= m_Verbosity && m_Out != NULL && m_Out->good()) { std::string timeStr = AffxTime::getCurrentTime().getTimeStampString(); (*m_Out) << timeStr << "\t"; if(m_Profile) (*m_Out) << profileString(); (*m_Out) << log; // For the log file we always dump a newline (*m_Out) << std::endl; m_Out->flush(); } } /** * Begin a progress reporting. After this initial call the * progressStep call will be called N times where N = the parameter * total passed in below. * * @param verbosity - What level of verbosity is the progress * associated with. Higher levels of verbosity produce more * messages. * @param msg - String message associated with the beginning of this task. * @param total - Expected number of times that the progressStep() * will be called after this. */ void LogStream::progressBegin(int verbosity, const std::string &msg, int total) { m_ProgressTimeStart.push_back(time(NULL)); m_ProgressTotal.push_back(total); if(verbosity <= m_Verbosity && m_Out != NULL && m_Out->good()) { // flush any C io before doing our IO fflush(NULL); std::string timeStr = AffxTime::getCurrentTime().getTimeStampString(); (*m_Out) << timeStr << "\t"; if(m_Profile) (*m_Out) << profileString(); (*m_Out) << msg; // For the log file we always dump a newline (*m_Out) << std::endl; m_Out->flush(); } } /** * This function is called when one unit of work has been done. In * general it is expected that the units of work should be roughly * equal in size. * * @param verbosity - At what verbosity levell should this step be * displayed. */ void LogStream::progressStep(int verbosity) { if(verbosity <= m_Verbosity && m_Out != NULL && m_Out->good()) { // flush any C io before doing our IO fflush(NULL); std::string timeStr = AffxTime::getCurrentTime().getTimeStampString(); (*m_Out) << timeStr << "\t"; if(m_Profile) (*m_Out) << profileString(); (*m_Out) << "Progress Step"; // For the log file we always dump a newline (*m_Out) << std::endl; m_Out->flush(); } } /** * Signals the end of progress report. * * @param verbosity - Level of verbosity associated with this progress report. * @param msg - Closing message from calling function. */ void LogStream::progressEnd(int verbosity, const std::string &msg) { time_t timeStart = m_ProgressTimeStart[m_ProgressTimeStart.size()-1]; m_ProgressTimeStart.pop_back(); m_ProgressTotal.pop_back(); if(verbosity <= m_Verbosity && m_Out != NULL && m_Out->good()) { time_t timeEnd = time(NULL); double dRunTime = (timeEnd - timeStart); // time span in seconds. std::string str; if (dRunTime < 60) { str = msg + "\t"; str += ::getDouble(dRunTime, 2, true); str += " second run time"; } else if (dRunTime < (60 * 60)) { str = msg + "\t"; str += ::getDouble((dRunTime) / 60, 2, true); str += " minute run time"; } else { str = msg + "\t"; str += ::getDouble((dRunTime) / (60 * 60), 2, true); str += " hour run time"; } // flush any C io before doing our IO fflush(NULL); std::string timeStr = AffxTime::getCurrentTime().getTimeStampString(); (*m_Out) << timeStr << "\t"; if(m_Profile) (*m_Out) << profileString(); (*m_Out) << str ; // For the log file we always dump a newline (*m_Out) << std::endl; m_Out->flush(); } } /** * What level of verbosity is requested. We force lots of output here. * * @param verbosity - Level below which progress messages are printed. */ void LogStream::setBaseVerbosity(int verbosity) { // do notthing } std::string toMB(int64_t mem) { return ToStr(mem/MEGABYTE); } #ifdef __APPLE__ void _getProcessMemOSX(uint64_t &rss, uint64_t &vs) { struct task_basic_info t_info; mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; if (KERN_SUCCESS != task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count)) { rss = 0; vs = 0; } else { rss = t_info.resident_size; vs = t_info.virtual_size; /* Looks like the os-x docs are wrong. The return values are in bytes, not pages vm_size_t page_size; if(KERN_SUCCESS != host_page_size(mach_task_self(), &page_size)) { rss = 0; vs = 0; } else { rss *= page_size; vs *= page_size; } */ } } #endif #ifdef __linux__ int parseProcLine(char* line){ int i = strlen(line); while (*line < '0' || *line > '9') line++; line[i-3] = '\0'; i = atoi(line); return i; } void _getProcessMemLinux(uint64_t &rss, uint64_t &vs) { FILE* file = fopen("/proc/self/status", "r"); char line[128]; rss = 0; vs = 0; // VmPeak: 3764 kB // VmSize: 3764 kB // VmLck: 0 kB // VmHWM: 376 kB // VmRSS: 376 kB // VmData: 164 kB // VmStk: 88 kB // VmExe: 20 kB // VmLib: 1408 kB // VmPTE: 32 kB while (fgets(line, 128, file) != NULL){ if (strncmp(line, "VmSize:", 7) == 0) vs = parseProcLine(line); if (strncmp(line, "VmRSS:", 6) == 0) rss = parseProcLine(line); } fclose(file); vs *= 1024; rss *= 1024; } #endif #ifdef WIN32 void _getProcessMemWin32(uint64_t &rss, uint64_t &vs) { PROCESS_MEMORY_COUNTERS_EX pmc; if(GetProcessMemoryInfo(GetCurrentProcess(), (PPROCESS_MEMORY_COUNTERS)&pmc, sizeof(pmc))==0) { vs = 0; rss = 0; } else { vs = pmc.WorkingSetSize; rss = pmc.PrivateUsage; } } #endif std::string LogStream::profileString() { uint64_t freeRam = 0, totalRam = 0, swapAvail = 0, memAvail = 0; Util::memInfo(freeRam, totalRam, swapAvail, memAvail, false); uint64_t memFreeAtStart = Util::getMemFreeAtStart(); uint64_t memFreeAtBlock = Util::getMemFreeAtBlock(); int64_t memUsedSinceStart = memFreeAtStart; memUsedSinceStart -= memAvail; int64_t memUsedSinceBlock = memFreeAtBlock; memUsedSinceBlock -= memAvail; std::string profile = toMB(totalRam) + "\t" + toMB(freeRam) + "\t" + toMB(swapAvail) + "\t" + toMB(memAvail) + "\t" + toMB(memFreeAtStart) + "\t" + toMB(memFreeAtBlock) + "\t" + toMB(memUsedSinceStart) + "\t" + toMB(memUsedSinceBlock) + "\t"; #ifdef __APPLE__ uint64_t rss=0, vs=0; _getProcessMemOSX(rss, vs); profile += toMB(rss) + "\t" + toMB(vs) + "\t"; #endif #ifdef __linux__ uint64_t rss=0, vs=0; _getProcessMemLinux(rss, vs); profile += toMB(rss) + "\t" + toMB(vs) + "\t"; #endif #ifdef WIN32 uint64_t rss=0, vs=0; _getProcessMemWin32(rss, vs); profile += toMB(rss) + "\t" + toMB(vs) + "\t"; #endif return profile; } affxparser/src/fusion/util/LogStream.h0000644000175200017520000000453514516003651021062 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file LogStream.h * @author Chuck Sugnet * @date Tue Mar 21 10:35:22 PST 2006 * * @brief Output verbose messages to log file as we go along. */ #ifndef LOGSTREAM_H #define LOGSTREAM_H // #include "portability/apt-win-dll.h" #include "util/AffxTime.h" #include "util/MsgHandler.h" #include "util/Util.h" // #include #include #include /** * @brief Log messages with timestamps. */ class APTLIB_API LogStream : public MsgHandler, public ProgressHandler { public: /** Constructor. */ LogStream(int verbosity=3, std::ostream *out=NULL, bool profile=true); ~LogStream(); void setStream(std::ostream *out); void unsetStream(std::ostream *out); virtual void message(int verbosity, const std::string &log, bool delimiter = true); virtual void progressBegin(int verbosity, const std::string &msg, int total); virtual void progressStep(int verbosity); virtual void progressEnd(int verbosity, const std::string &msg); void setBaseVerbosity(int verbosity); protected: std::string profileString(); protected: int m_Verbosity; ///< What level of messages is wanted, larger num == more msgs std::ostream *m_Out; ///< Stream to print messages to bool m_Profile; ///< Should we report profile info std::vector m_ProgressTimeStart; ///< Start times for progress segments std::vector m_ProgressTotal; ///< Number of times step is expected to be called }; #endif /* LOGSTREAM_H */ affxparser/src/fusion/util/MatrixCheck.h0000644000175200017520000001340514516003651021363 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file MatrixCheck.h * @author Chuck Sugnet * @date Tue Apr 25 16:12:22 2006 * * @brief Class for doing a check of two matrix files after regression run. */ #ifndef MATRIXCHECK_H #define MATRIXCHECK_H // #include "util/Fs.h" #include "util/RegressionCheck.h" #include "util/Util.h" #include "util/Verbose.h" // #include #include #include #include // /** * Class for testing that two files are the same +/- some * epsilon. Currently only supports tab separated matrixes of * numerical data. */ class MatrixCheck : public RegressionCheck { public: /** * Constructor. * * @param generated - File created by application being tested. * @param gold - File believed to be "truth" * @param eps - Maximum absolute difference from truth acceptable. * @param rowSkip - Number of rows to skip before comparing. * @param colSkip - Number of columns to skip before comparing. * @param matchNames - Should we try to match the names using * first column? Useful when the files aren't * necessarily in same order. * @param allowedMisMatch - How many can we get wrong before failing? * @param frac - Maximum fractional difference from truth acceptable */ MatrixCheck( const std::string &generated, const std::string &gold, double eps, int rowSkip, int colSkip, bool matchNames, unsigned int allowedMisMatch, double frac = 0.0 ) : m_Generated(generated), m_Gold(gold), m_Epsilon(eps), m_Fraction(frac), m_RowSkip(rowSkip), m_ColSkip(colSkip), m_MatchNames(matchNames), m_AllowedMisMatch(allowedMisMatch), m_PrintMismatch(false), m_PrintMismatchMax(-1) { m_Name = Fs::basename(generated); } /** * Utility function to enable/disable reporting of individual differences * @param print - mismatches printed if true, not printed if false. * @return - void */ void setPrintMismatch(bool print) { m_PrintMismatch = print; } /** * Utility function to set the max number of mismatches reported * @param max - maximum number of mismatches to report (set to -1 for no limit default behavior) * @return - void */ void setPrintMismatchMax(int max) { m_PrintMismatchMax = max; } /** * Check to make sure that two files are the same +/- some epsilon. * @param msg - Fills in an error message if test fails, empty string otherwise. * @return - Returns true if files are close enough, false otherwise. */ bool check(std::string &msg) { std::string generated(m_Generated), gold(m_Gold); msg = ""; bool success = true; // Fix pathnames to work for this platform. generated = Fs::convertToUncPath(generated); gold = Fs::convertToUncPath(gold); /* Santiy checks. */ if (!Fs::isReadable(generated)) { msg += "Can't open file: "+FS_QUOTE_PATH(generated)+" to read."; return false; } if (!Fs::isReadable(gold.c_str())) { msg += "Can't open file: "+FS_QUOTE_PATH(gold)+" to read."; return false; } if(!(m_ColSkip >= 0 && m_RowSkip >= 0 && m_Epsilon >= 0)) { msg += "invalid ColSkip, RowSkip, and/or Epsilon"; return false; } /* Count up differences. */ // push handler to throw exception Err::setThrowStatus(true); int diffCount; try { diffCount = Util::matrixDifferences(generated.c_str(), gold.c_str(), m_ColSkip, m_RowSkip, m_Epsilon, m_PrintMismatch, m_MatchNames, m_Fraction, m_PrintMismatchMax ); } catch(Except &e) { msg += "Caught exception: " + ToStr(e.what()); return false; } Err::setThrowStatus(true); if(diffCount > (int)m_AllowedMisMatch) { success = false; msg += "File: " + generated + " vs " + gold + ": "; msg += ToStr("Expecting no more than ") + ToStr(m_AllowedMisMatch) + ToStr(" found: ") + ToStr(diffCount); } return success; } std::string m_Generated; ///< File created by application being tested. std::string m_Gold; ///< File believed to be "truth" double m_Epsilon; ///< Maximum abosolute difference from truth acceptable. double m_Fraction; ///< Maximum fractional difference from truth acceptable int m_RowSkip; ///< Number of rows to skip before comparing. int m_ColSkip; ///< Number of columns to skip before comparing. bool m_MatchNames; ///< Should we try to match the names using /// first column? Useful when the files aren't /// necessarily in same order. unsigned int m_AllowedMisMatch; ///< How many can we get wrong before failing? bool m_PrintMismatch; ///< Mismatches printed if true, not printed if false. int m_PrintMismatchMax; ///< Maximum number of mismatches to print (default -1 for no limit) }; #endif /* MATRIXCHECK_H */ affxparser/src/fusion/util/MessageStream.h0000644000175200017520000000370614516003651021724 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /* \file MessageStream.h Provides a base class for streaming messages from a component to its calling function. */ #ifndef _MessageStream_HEADER_ #define _MessageStream_HEADER_ #include #include // /*! Provides a base class definition for streaming back messages to the parent process. */ class MessageStream { protected: /*! The level of messages to display. Larger number equals more messages. */ int verbosity; public: /*! Constructor */ MessageStream() { verbosity = 0; } virtual ~MessageStream() { } /*! Set the level of verbosity desired. 0 == no messages * 1 == normal messages, 2,3,4, etc. == more verbose. * @param level - level of verbosity desired. */ void SetLevel(int level) { verbosity = level; } /*! Get the level of verbosity desired. 0 == no messages * 1 == normal messages, 2,3,4, etc. == more verbose. * @return current level of verbosity */ int GetLevel() const { return verbosity;} /*! Send a message. * @param level - level of verbosity desired. * @param msg The message to send. */ virtual void Write(int level, const std::string &msg) = 0; }; #endif affxparser/src/fusion/util/MixedFileCheck.h0000644000175200017520000002354314516003651021771 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2006 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file MixedFileCheck.h * @brief Class for testing whether two text files, containing * both text and numeric data, are equal, within tolerances * for numeric data. */ #ifndef MIXEDFILECHECK_H #define MIXEDFILECHECK_H // #include "util/Convert.h" #include "util/Fs.h" #include "util/RegressionCheck.h" // #include #include #include #include /** * Class for testing whether two files, containing both text * and numeric data, are equal, within tolerances for numeric * data, ignoring line endings. */ class MixedFileCheck: public RegressionCheck { public: /** * Constructor. * * @param generatedFile File generated by application. * @param goldFile Comparison file, assumed to be correct. * @param eps Maximum accepted absolute difference in numeric values. * i.e. if |generated-gold| >= eps then there is a difference. * @param skipDataLines Number of data lines to skip (after header lines auto-skipped). * @param allowedMismatch Maximum number of non-matching numeric values accepted. * @param frac Maximum accepted fractional difference in numeric values (not used by default). * i.e. if |generated-gold| >= frac*max(|generated|,|gold|) then there is a difference. */ MixedFileCheck (const std::string &generatedFile, const std::string &goldFile, const double eps, const unsigned int skipDataLines, const unsigned int allowedMismatch, const double frac = 0.0) : m_GeneratedFile (generatedFile), m_GoldFile (goldFile), m_Eps (eps), m_SkipDataLines (skipDataLines), m_AllowedMismatch (allowedMismatch), m_MaxErrorsReport (-1), m_Frac (frac) { m_Name=Fs::basename(generatedFile); } /** * Utility function to set the max number of errors reported * @param max - maximum number of errors to report (set to -1 for no limit default behavior) * @return - void */ void setMaxError(int max) { m_MaxErrorsReport = max; } /** * Check that the two files are the same, within tolerances. * * @param errorMsg Error message generated if the test fails. * @return bool Return true if files pass tests, else false. */ bool check (std::string& errorMsg) { // Convert File Names m_GoldFile = Fs::convertToUncPath(m_GoldFile); m_GeneratedFile = Fs::convertToUncPath(m_GeneratedFile); // Open files. Verbose::out(1,"Reading in file: " + m_GoldFile); Verbose::out(1,"Reading in file: " + m_GeneratedFile); std::ifstream generatedStream; Fs::aptOpen(generatedStream, m_GeneratedFile); if (!generatedStream.is_open() && !generatedStream.good() ) { errorMsg = "Unable to open generated file " + m_GeneratedFile; Verbose::out(1,errorMsg); return false; } std::ifstream goldStream; Fs::aptOpen(goldStream, m_GoldFile); if (!goldStream.is_open() && !goldStream.good() ) { errorMsg = "Unable to open gold file " + m_GoldFile; Verbose::out(1,errorMsg); return false; } unsigned int lineNumberGold = 0; // absolute line number in gold file (used for reporting differences) unsigned int lineNumberGen = 0; // absolute line number in generated file (used for reporting differences) unsigned int lineCount = 0; // number of data lines loaded (excludes skipped header lines) unsigned int mismatchCount = 0; const char* lineEndings = "\r\n"; std::string goldLine, generatedLine; Verbose::out(1,"Looking for differences."); while (! goldStream.eof() && ! goldStream.fail()) { // Skip header lines -- for now // really need TsvFileCheck that replaces // MixedFileCheck and MatrixFileCheck do { lineNumberGold++; getline (goldStream, goldLine); } while(goldLine[0] == '#'); do { lineNumberGen++; getline (generatedStream, generatedLine); } while(generatedLine[0] == '#'); if (generatedStream.eof() && ! goldStream.eof()) { errorMsg = "The generated file, " + m_GeneratedFile + ", has fewer lines than the gold file, " + m_GoldFile; Verbose::out(1,errorMsg); return false; } // Skip header lines which need not be equal. if (++lineCount > m_SkipDataLines) { // Avoid line ending hassles. goldLine = goldLine.erase (goldLine.find_last_not_of (lineEndings) + 1); generatedLine = generatedLine.erase (generatedLine.find_last_not_of (lineEndings) + 1); // Skipping white space, convert each line to a series of strings. unsigned int columnNumber = 0; // column number for values compared (used for reporting differences) std::istringstream goldStream (goldLine); std::istringstream generatedStream (generatedLine); std::string goldString, generatedString; while (goldStream >> goldString) { columnNumber++; // Require the same number of whitespace delimited fields. if (! (generatedStream >> generatedString)) { Verbose::out(1,"Unequal amount of whitespace delimited fields in files"); errorMsg = lineErrorMsg (generatedLine, goldLine); Verbose::out(1,errorMsg); return false; } double goldDouble, generatedDouble; bool goldSuccess, generatedSuccess; goldDouble = Convert::toDoubleCheck (goldString.c_str(), &goldSuccess); generatedDouble = Convert::toDoubleCheck (generatedString.c_str(), &generatedSuccess); // If both fields are numeric, check for equality within the prescribed tolerance. if (goldSuccess && generatedSuccess) { // allowed absolute difference from fractional tolerance (zero by default) double eps2 = m_Frac*Max( fabs(goldDouble), fabs(generatedDouble) ); // absolute difference is acceptable if it satisfies either (least restrictive) tolerance if (fabs (goldDouble - generatedDouble) > Max(m_Eps,eps2)) { ++mismatchCount; // report differences with ZERO-based line and column numbers if( (int)mismatchCount<=m_MaxErrorsReport || m_MaxErrorsReport<0 ) Verbose::out(1,"Numbers differ at gold line " + ToStr(lineNumberGold-1) + " generated line " + ToStr(lineNumberGen-1) + " column " + ToStr(columnNumber-1) + ": " + goldString + " and " + generatedString); if( (int)mismatchCount==m_MaxErrorsReport+1 && m_MaxErrorsReport>0 ) Verbose::out(1,"Number of differences exceeds maximum number (" + ToStr(m_MaxErrorsReport) + ") to report."); } continue; } // If neither field is numeric, require them to be identical. if ((! goldSuccess) && (! generatedSuccess) && (goldString == generatedString)) continue; else Verbose::out(1,"Strings differ: " + goldString + " and " + generatedString); // Quit if there is a type mismatch or both fields are non-numeric, not identical. errorMsg = lineErrorMsg (generatedLine, goldLine); Verbose::out(1,errorMsg); return false; } // end while (goldStream >> goldString) // Require that the two lines have the same number of fields. if (generatedStream >> generatedString) { errorMsg = lineErrorMsg (generatedLine, goldLine); Verbose::out(1,errorMsg); return false; } } // end if (++lineCount > m_SkipDataLines) } // end while (! goldStream.eof() && ! goldStream.fail()) // The two files should reach eof at the same time. if (! generatedStream.eof()) { errorMsg = "The generated file, " + m_GeneratedFile + ", has more lines than the gold file, " + m_GoldFile; Verbose::out(1,errorMsg); return false; } // Require that the number of numeric differences above tolerance is below // the defined threshold. if (mismatchCount > m_AllowedMismatch) { errorMsg = "There were " + ToStr (mismatchCount) + " instances where " + "numeric fields differed by more than the accepted tolerance: only " + ToStr (m_AllowedMismatch) + " are allowed"; Verbose::out(1,errorMsg); return false; } Verbose::out(1,"Same."); return true; } private: /** * Generate a generic error message for a line mismatch. * * @param generatedLine Line generated by the application. * @param goldLine Line considered to be correct. * @return Error message. */ const std::string lineErrorMsg (const std::string& generatedLine, const std::string& goldLine) { const std::string msg = "Mismatch reading generated file " + m_GeneratedFile + ":\ngold line: '" + goldLine + "'\ngenerated line: '" + generatedLine + "'"; return msg; } /// Name of file generated by application being tested. std::string m_GeneratedFile; /// Name of file assumed to be correct. std::string m_GoldFile; /// Maximum accepted absolute difference in numeric values. const double m_Eps; /// Number of data lines to skip (header lines are already auto-skipped). const unsigned int m_SkipDataLines; /// Maximum number of non-matching numeric values accepted. const unsigned int m_AllowedMismatch; /// The maximum number of errors to report int m_MaxErrorsReport; /// Maximum fractional difference considered equivalent. const double m_Frac; }; #endif /* MIXEDFILECHECK_H */ affxparser/src/fusion/util/MsgHandler.h0000644000175200017520000000374614516003651021214 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file MsgHandler.h * @author Chuck Sugnet * @date Mon Jan 30 16:46:01 2006 * * @brief File for handling messages from Verbose handler. */ #ifndef MSGHANDLER_H #define MSGHANDLER_H #include #include // /** * @brief An abstract handler for messages. */ class MsgHandler { public: /** Virtual destructor for a virtual class. */ virtual ~MsgHandler() {} /** * A message to be processed by the handler. * * @param level - What level of verbosity is associated with this message, higher number == more verbosity. * @param msg - Message that is to be processed. * @param delimiter - Should a delimiter be emitted as well? */ virtual void message(int level, const std::string &msg, bool delimiter = true) = 0; /** * What level of verbosity is requested. This is a hint to the class and can * be ignored if desired. * * @param level - Level below which progress messages are printed. */ void setBaseVerbosity(int level) { m_Verbosity = level; } protected: int m_Verbosity; ///< What level of verbosity is processed, higher == more messages. }; #endif /* MSGHANDLER_H */ affxparser/src/fusion/util/MsgSocketHandler.cpp0000644000175200017520000001065414516003651022714 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2010 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // include these before someone else does #ifdef _WIN32 #include #include #endif #include "util/MsgSocketHandler.h" #include "util/SocketClient.h" #include "util/Except.h" #include "util/Err.h" #include "util/Verbose.h" #include using namespace std; MsgSocketHandler::MsgSocketHandler(int verbosity) { m_LocVerbosity = MsgHandler::m_Verbosity = ProgressHandler::m_Verbosity = verbosity; m_Client = new SocketClient(); } MsgSocketHandler::~MsgSocketHandler() { delete m_Client; } void MsgSocketHandler::checkForKill() { m_Client->checkForMsgs(m_Messages); for (size_t i = 0; i < m_Messages.size(); i++) { if ((m_Messages[i].find(SOCKETSTOP) != std::string::npos) || (m_Messages[i].find(FATALERROR) != std::string::npos)) { Err::errAbort("ApplicationTerminated"); } } } void MsgSocketHandler::openSocket(const std::string &host, const std::string &port) { m_Client->socketConnect(host, port); } void MsgSocketHandler::progressBegin(int verbosity, const std::string &msg, int total) { m_Total = total; if(verbosity <= m_LocVerbosity) { checkForKill(); map param; param["VERBOSITY"] = ToStr(verbosity); param["VALUE"] = "BEGIN"; param["TOTAL"] = ToStr(total); param["MSG"] = msg + m_EndLine; string message = SocketHandler::serializeMap(param); m_Client->sendMsg(message, SocketHandler::TEXT, SocketHandler::PROGRESS); } } void MsgSocketHandler::progressStep(int verbosity) { if(verbosity <= m_LocVerbosity) { checkForKill(); map param; param["VERBOSITY"] = ToStr(verbosity); param["VALUE"] = "STEP"; string msg = SocketHandler::serializeMap(param); m_Client->sendMsg(msg, SocketHandler::TEXT, SocketHandler::PROGRESS); } } void MsgSocketHandler::progressEnd(int verbosity, const std::string &msg) { if(verbosity <= m_LocVerbosity) { checkForKill(); map param; param["VERBOSITY"] = ToStr(verbosity); param["VALUE"] = "END"; param["MSG"] = msg + m_EndLine; string message = SocketHandler::serializeMap(param); m_Client->sendMsg(message, SocketHandler::TEXT, SocketHandler::PROGRESS); } } /** * A message to be processed by the handler. * * @param level - What level of verbosity is associated with this message, higher number == more verbosity. * @param msg - Message that is to be processed. * @param delimiter - Should a delimiter be emitted as well? */ void MsgSocketHandler::message(int level, const std::string &msg, bool delimiter) { if (level <= m_LocVerbosity) { checkForKill(); map param; param["VERBOSITY"] = ToStr(level); param["MSG"] = msg + m_EndLine; string message = SocketHandler::serializeMap(param); if ((message.find(SOCKETSTOP) != std::string::npos) || (message.find(FATALERROR) != std::string::npos)) { m_Client->sendMsg(message, SocketHandler::TEXT, SocketHandler::KILLMSG); } m_Client->sendMsg(message, SocketHandler::TEXT, SocketHandler::VERBOSE); } } void MsgSocketHandler::finishedMsg() { map param; param["VERBOSITY"] = "0"; param["VALUE"] = "finished"; string message = SocketHandler::serializeMap(param); m_Client->sendMsg(message, SocketHandler::TEXT, SocketHandler::COMPLETION); } /** * What level of verbosity is requested. This is a hint to the class and can * be ignored if desired. * * @param level - Level below which progress messages are printed. */ void MsgSocketHandler::setBaseVerbosity(int level) { m_LocVerbosity = level; } affxparser/src/fusion/util/MsgSocketHandler.h0000644000175200017520000001034514516003651022356 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file MsgSocketHandler.h * @author Chuck Sugnet * @date Thu Jan 21 16:26:33 PST 2010 * * @brief File for handling messages from Verbose handler to a socket */ #ifndef MSGSOCKETHANDLER_H #define MSGSOCKETHANDLER_H #include #include #include #include "util/MsgHandler.h" #include "util/ProgressHandler.h" // class SocketClient; #define SOCKETSTOP "_SOCKET_STOP_" #define FATALERROR "FATAL ERROR:" /** * @brief An abstract handler for messages. */ class MsgSocketHandler : public MsgHandler, public ProgressHandler { public: /** * Constructor * @param verbosity - Verbosity level to print at higher -> more messages */ MsgSocketHandler(int verbosity); /** * Destructor */ ~MsgSocketHandler(); /** * Check to see if handler has been sent the message to abort by server. */ void checkForKill(); /** * Open a socket for reading/writing on the specified host and port * * @param host - Ip address, often 127.0.0.1 or localhost * @param port - Which port to open */ void openSocket(const std::string &host, const std::string &port); /** * Begin a progress reporting. After this initial call the * progressStep call will be called N times where N = the parameter * total passed in below. * * @param verbosity - What level of verbosity is the progress * associated with. Higher levels of verbosity produce more * messages. * @param msg - String message associated with the beginning of this task. * @param total - Expected number of times that the progressStep() * will be called after this. */ virtual void progressBegin(int verbosity, const std::string &msg, int total); /** * This function is called when one unit of work has been done. In * general it is expected that the units of work should be roughly * equal in size. * * @param verbosity - At what verbosity level should this step be * displayed. */ void progressStep(int verbosity); /** * Signals the end of progress report. * * @param verbosity - Level of verbosity associated with this progress report. * @param msg - Closing message from calling function. */ void progressEnd(int verbosity, const std::string &msg); /** * A message to be processed by the handler. * * @param level - What level of verbosity is associated with this message, higher number == more verbosity. * @param msg - Message that is to be processed. * @param delimiter - Should a delimiter be emitted as well? */ void message(int level, const std::string &msg, bool delimiter = true); /** * What level of verbosity is requested. This is a hint to the class and can * be ignored if desired. * * @param level - Level below which progress messages are printed. */ void setBaseVerbosity(int level); /** * Send message indication termination of computation. */ void finishedMsg(); /** * Set end of line suffix */ void setEndOfLine(const std::string &s) { m_EndLine = s; } protected: SocketClient *m_Client; ///< Class for reading from and writing to socket. int m_LocVerbosity; ///< Our verbosity level int m_Total; ///< How many steps total are we expecting? std::vector m_Messages; ///< Buffer for messages from the socket. std::string m_EndLine; ///< Any end to tack on messages (like "\n"); }; #endif /* MSGHANDLER_H */ affxparser/src/fusion/util/MsgStream.cpp0000644000175200017520000000336014516003651021415 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file MsgStream.cpp * @author Chuck Sugnet * @date Mon Jul 10 16:30:07 2006 * * @brief File for handling messages and printing them to the stream. * * */ // #include "util/MsgStream.h" // #include "util/Err.h" #include "util/Util.h" using namespace std; /** * A message to be processed by the stream. * * @param level - What level of verbosity is associated with this message, higher number == more verbosity. * @param msg - Message that is to be processed. * @param delimiter - Should a delimiter be emitted as well? */ void MsgStream::message(int level, const std::string &msg, bool delimiter) { if(level <= m_Verbosity && m_Out != NULL) { if(!m_Out->good()) { Err::errAbort("MsgStream::message() - problem writing to stream."); } string msgMod = msg; Util::subChar(msgMod, '\t', ' '); (*m_Out) << msgMod; if(delimiter) (*m_Out) << std:: endl; } } affxparser/src/fusion/util/MsgStream.h0000644000175200017520000000347614516003651021072 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file MsgStream.h * @author Chuck Sugnet * @date Mon Jan 30 16:48:01 2006 * * @brief File for handling messages and printing them to the stream. * */ #ifndef MSGSTREAM_H #define MSGSTREAM_H #include "util/MsgHandler.h" // #include #include #include // /** * @brief An output stream for messages. */ class MsgStream : public MsgHandler { public: /** Constructor. */ MsgStream(int level, std::ostream *out=NULL) { m_Verbosity = level; m_Out = out; } /** * A message to be processed by the stream. * * @param level - What level of verbosity is associated with this message, higher number == more verbosity. * @param msg - Message that is to be processed. * @param delimiter - Should a delimiter be emitted as well? */ void message(int level, const std::string &msg, bool delimiter = true); private: std::ostream *m_Out; ///< Stream to print messages to (cout by default.) }; #endif /* MSGSTREAM_H */ affxparser/src/fusion/util/Options.cpp0000644000175200017520000001650014516003651021146 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "util/Options.h" // #include "calvin_files/utils/src/StringUtils.h" /** * Constructor */ Options::Options() { m_Options.resize(1); m_Labels["current"] = 0; } /** * Destructor */ Options::~Options() { } void Options::getOptionNames(std::vector &names, int oI){ for(size_t i=0; i < m_Options[oI].m_option_vec.size(); i++) { names.push_back(m_Options[oI].m_option_vec[i]->m_longName); } } void Options::getOptionTypes(std::vector &types, int oI) { for(size_t i=0; i < m_Options[oI].m_option_vec.size(); i++) { types.push_back(m_Options[oI].m_option_vec[i]->m_type); } } /** * Check to see if an option is defined. * * @param name - option name */ bool Options::isOptDefined(const std::string& name, int oI) { return (m_Options[0].findOpt(name) != NULL); } /** * Set option value * * @param name - option name * @param value - value for the option */ void Options::setOpt(const std::string &name, const std::string& value) { if (!m_Options[0].isOptDefined(name)) {Err::errAbort("Option " + name + " cannot be found in the options for this engine.");} m_Options[0].set(name, value); //Verbose::out(1,"Set string option " + name + " to " + value); } /** * Set option to vector of values * * @param name - option name * @param value - vector of values */ void Options::setOpt(const std::string &name, std::vector< std::string > & values) { if (!isOptDefined(name)) { Err::errAbort("Option " + name + " cannot be found in the options for this engine."); } if (values.size() != 0) { if (m_Options[0].mustFindOpt(name)->m_allowMultiple) { m_Options[0].mustFindOpt(name)->m_values = values; } else { if (values.size() > 1) {Err::errAbort("Option " + name + " does not allow multiple values.");} if (values.size() == 1) {setOpt(name, values[0]);} } } } /** * Set option to vector of vector of values * * @param name - option name * @param value - vector of vector of values */ void Options::setOpt(const std::string & name, std::vector< std::vector > & values ) { ///@todo should roll into PgOptions support for this rather than using a stand alone hack if (!isOptDefined(name)) {Err::errAbort("Option " + name + " cannot be found in the options for this engine.");} m_Options[0].mustFindOpt(name)->setValue(ToStr(values.size())); for (size_t i =0; i < values.size(); i++ ) { pushOpt(name, values[i]); } } /** * Add a value to a vector of values for option * * @param name - option name * @param value - value to push onto vector of values */ void Options::pushOpt( const std::string &name, const std::string & value ) { if (!isOptDefined(name)) {Err::errAbort("Option " + name + " cannot be found in the options for this engine.");} m_Options[0].mustFindOpt(name)->pushValue(value); } /** * Add a vector of values to the values for an option * * @param name - option name * @param value - vector of values to push */ void Options::pushOpt ( const std::string & name, std::vector< std::string > values ) { if (!isOptDefined(name)) {Err::errAbort("Option " + name + " cannot be found in the options for this engine.");} ///@todo should roll into PgOptions support for this rather than using a stand alone hack int nextRow = 0; while ( m_Options[0].findOpt( name + "::" + ToStr( nextRow )) != NULL ) { nextRow++; } m_Options[0].mustFindOpt(name)->setValue(ToStr(nextRow + 1)); defOptMult("", name + "::" + ToStr(nextRow),PgOpt::STRING_OPT,"",""); m_Options[0].mustFindOpt( name + "::" + ToStr(nextRow))->m_values = values; } /** * Parse and set options from argv * * @param argv - arg vector */ int Options::parseArgv( const char * const * const argv, int start ) { return m_Options[0].parseArgv( argv, start ); } PgOpt* Options::getPgOpt(const std::string& name, int oI) { if (!isOptDefined(name, oI)) {Err::errAbort("Option " + name + " cannot be found in the options for this engine.");} return m_Options[oI].mustFindOpt(name); } /** * Get the boolean value of an option * * @param name - the name of the option */ bool Options::getOptBool(const std::string& name, int oI) { if (!isOptDefined(name,oI)) {Err::errAbort("Option " + name + " cannot be found in the options for this engine.");} if ((getOpt(name,oI) != "true") && (getOpt(name,oI) != "false") && (getOpt(name,oI) != "")) { Err::errAbort("Option " + name + " must be set to either true or false."); } return m_Options[oI].getBool(name ); } /** * Get the integer value of an option * * @param name - the name of the option */ int Options::getOptInt(const std::string& name, int oI) { if (!isOptDefined(name,oI)) {Err::errAbort("Option " + name + " cannot be found in the options for this engine.");} return m_Options[oI].getInt(name); } /** * Get the integer value of an option * * @param name - the name of the option */ double Options::getOptDouble(const std::string& name, int oI) { if (!isOptDefined(name,oI)) {Err::errAbort("Option " + name + " cannot be found in the options for this engine.");} return m_Options[oI].getDouble(name); } /** * Return a vector of values for a particular option * * @param name - option name */ std::vector< std::string >& Options::getOptVector (const std::string &name, int oI) { if (!isOptDefined(name,oI)) {Err::errAbort("Option " + name + " cannot be found in the options snapshot for this engine.");} return m_Options[oI].mustFindOpt( name )->m_values; } std::string Options::getXMLParameterFileName(int oI) { return m_Options[oI].getXMLParameterFileName(); } std::string Options::getXMLParameterFileGuid(int oI) { return m_Options[oI].getXMLParameterFileGuid(); } /** * Get the name of the Engine */ std::string Options::getProgName(int oI) { return m_Options[oI].getProgName(); } void Options::printOptions(const std::string &prefix, int oI) { // Spit out to the log the options ///@todo kind of ugly how we are abusing PgOptions for(size_t i=0; im_longName; std::string vals; if(m_Options[oI].m_option_vec[i]->m_values.size() > 0) { vals = "'" + m_Options[oI].m_option_vec[i]->m_values[0] + "'"; for(size_t j=1; j< m_Options[oI].m_option_vec[i]->m_values.size(); j++) vals += ", '" + m_Options[oI].m_option_vec[i]->m_values[j] + "'"; } else {vals = "'" + m_Options[oI].m_option_vec[i]->getValue() + "'";} Verbose::out(2, prefix + "Option '" + name + "' = " + vals); } } affxparser/src/fusion/util/Options.h0000644000175200017520000001456314516003651020622 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file Options.h * @author Alan Williams * @date Mon Jun 23 14:57:34 PDT 2008 * * @brief base class for tracking class options */ #ifndef _OPTIONS_H_ #define _OPTIONS_H_ // #include "portability/apt-win-dll.h" #include "util/Options.h" #include "util/PgOptions.h" #include "util/Util.h" // #include #include #include /** * @brief base class for tracking class options */ class APTLIB_API Options { public: /** * Constructor */ Options(); /** * Constructor * @param argv - vector of args to parse */ Options( char * argv[] ); /** * Destructor */ ~Options(); /** * Get the name of the options set */ std::string getProgName(int oI = 0); int getArgCount(int oI = 0) { return m_Options[oI].getArgCount(); } std::string getArg(int index, int oI = 0){ return m_Options[oI].getArg(index); } void getOptionNames(std::vector &names, int oI = 0); void getOptionTypes(std::vector &types, int oI = 0); std::string commandLine(int oI = 0) { return m_Options[oI].commandLine(); } void setUsage(const std::string& usage) { m_Options[0].setUsage(usage); } PgOpt* defineOption(const std::string& shortName, const std::string& longName, PgOpt::PgOptType_t type, const std::string& help, const std::string& defaultVal) { return m_Options[0].defineOption(shortName, longName, type, help, defaultVal); } PgOpt* defOptMult(const std::string& shortName, const std::string& longName, PgOpt::PgOptType_t type, const std::string& help, const std::string& defaultVal) { return m_Options[0].defOptMult(shortName, longName, type, help, defaultVal); } void defineOptionSection(const std::string §ionName) { m_Options[0].defineOptionSection(sectionName); } void optionsClear() { m_Options.resize(1); m_Options[0].clear(); } void optionUsage(std::set &hiddenOpts, bool printOpts = true, int oI = 0) { m_Options[oI].usage(hiddenOpts,printOpts); } /** * Check to see if an option is defined. * * @param name - option name */ bool isOptDefined(const std::string& name, int oI = 0); /** * Set option value * * @param name - option name * @param value - value for the option */ void setOpt(const std::string &name, const std::string& value); /** * Set option to vector of values * * @param name - option name * @param value - vector of values */ void setOpt (const std::string &name, std::vector< std::string > & values); /** * Set option to vector of vector of values * * @param name - option name * @param value - vector of vector of values */ void setOpt(const std::string & name, std::vector< std::vector > & values ); /** * Add a value to a vector of values for option * * @param name - option name * @param value - value to push onto vector of values */ void pushOpt( const std::string &name, const std::string & value ); /** * Add a vector of values to the values for an option * * @param name - option name * @param value - vector of values to push */ void pushOpt ( const std::string & name, std::vector< std::string > values ); /** * Parse and set options from argv * * @param argv - arg vector */ virtual int parseArgv( const char * const * const argv, int start = 1 ); /** * Get the underlying PgOpt containing information * * @param name - the name of the option */ PgOpt* getPgOpt(const std::string& name, int oI = 0); /** * Get the value of an option * * @param name - the name of the option */ std::string getOpt(const std::string& name, int oI = 0) { if (!isOptDefined(name, oI)) {Err::errAbort("Option " + name + " cannot be found in the options for this engine.");} return m_Options[oI].get( name ); } /** * Get the boolean value of an option * * @param name - the name of the option */ bool getOptBool(const std::string& name, int oI = 0); /** * Get the integer value of an option * * @param name - the name of the option */ int getOptInt(const std::string& name, int oI = 0); /** * Get the integer value of an option * * @param name - the name of the option */ double getOptDouble(const std::string& name, int oI = 0); /** * Return a vector of values for a particular option * * @param name - option name */ std::vector< std::string >& getOptVector (const std::string &name, int oI = 0); void printOptions(const std::string &prefix, int oI = 0); std::string getXMLParameterFileName(int oI = 0); std::string getXMLParameterFileGuid(int oI = 0); void setOptions(PgOptions &opts) { for(unsigned int i=0; iisSet()) m_Options[0].mustFindOpt(opts.m_option_vec[i]->m_longName)->m_values = opts.m_option_vec[i]->m_values; } int snapshotOptions() { std::string empty; return snapshotOptions(empty); } int snapshotOptions(const std::string &label){ int index = int(m_Options.size()); m_Options.push_back(m_Options[0]); if(label != "") m_Labels[label] = index; return index; }; private: /// Underlying options implementation std::vector m_Options; std::map m_Labels; }; #endif /* _OPTIONS_H_ */ affxparser/src/fusion/util/OutputMessageStream.cpp0000644000175200017520000000234714516003651023500 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "util/OutputMessageStream.h" // #include using namespace std; /* * Construct the object. */ OutputMessageStream::OutputMessageStream(int level, std::ostream *strm) { verbosity = level; outstr = strm; } /* * Write the message if the level is high enough */ void OutputMessageStream::Write(int level, const std::string &msg) { if (level <= verbosity) *outstr << msg; } affxparser/src/fusion/util/OutputMessageStream.h0000644000175200017520000000326514516003651023145 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /* \file OutputMessageStream.h Provides streaming messages to a ofstream from a component to its calling function. */ #ifndef _OutputMessageStream_HEADER_ #define _OutputMessageStream_HEADER_ #include "util/MessageStream.h" // #include #include // /*! Provides a base class definition for streaming back messages to the parent process. */ class OutputMessageStream : public MessageStream { private: /*! Where to stream the messages. */ std::ostream *outstr; public: /*! Construct a class with the type of output. * @param level - level of verbosity desired. * @param strm The output stream. */ OutputMessageStream(int level, std::ostream *strm = &std::cout); /*! Send a message. * @param level - level of verbosity desired. * @param msg The message to send. */ void Write(int level, const std::string &msg); }; #endif affxparser/src/fusion/util/PgOptions.cpp0000644000175200017520000006471314516003651021446 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file PgOptions.cpp * @author Chuck Sugnet & Harley Gorrell * @date Tue May 3 14:30:35 2005 * @brief Definitions for PgOptions class. */ // #include "util/Convert.h" #include "util/Err.h" #include "util/PgOptions.h" #include "util/Util.h" // #include #include #include // #ifndef APT_PGOPTIONS_NO_XERCES #define XML_LIBRARY #define XERCES_STATIC_LIBRARY //#include "../../external/xerces/src/xercesc/util/PlatformUtils.hpp" #include "../../external/xerces/src/xercesc/parsers/SAXParser.hpp" #include "../../external/xerces/src/xercesc/util/OutOfMemoryException.hpp" #include "../../external/xerces/src/xercesc/sax/HandlerBase.hpp" #include "../../external/xerces/src/xercesc/util/XMLUniDefs.hpp" #include "../../external/xerces/src/xercesc/util/XMLUni.hpp" #include "../../external/xerces/src/xercesc/sax/AttributeList.hpp" #include "../../external/xerces/src/xercesc/framework/XMLPScanToken.hpp" #include "../../external/xerces/src/xercesc/util/XMLString.hpp" #include "../../external/xerces/src/xercesc/framework/LocalFileInputSource.hpp" #include "../../external/xerces/src/xercesc/framework/URLInputSource.hpp" #include "util/AffxString.h" #include "util/Fs.h" class PgOptionsSAXHandler : public XERCES_CPP_NAMESPACE::HandlerBase { private: PgOptions* m_pPgOptions; std::vector* m_pvFileNames; unsigned int m_uiOptionCount; AffxString m_strPrevCategory; AffxString m_strAnalysis; public : PgOptionsSAXHandler(PgOptions* pPgOptions, std::vector& vFileNames) : m_pPgOptions(pPgOptions), m_pvFileNames(&vFileNames), m_uiOptionCount(0) { if (m_pPgOptions == NULL) {Err::errAbort("PgOptionsSAXHandler must be constructed with a valid PgOptions pointer.");} } ~PgOptionsSAXHandler() {} unsigned int getOptionCount() const {return m_uiOptionCount;} void warning(const XERCES_CPP_NAMESPACE::SAXParseException& exc) {Verbose::out(1, "WARNING: " + toString(exc.getMessage()));} void error(const XERCES_CPP_NAMESPACE::SAXParseException& exc) {Err::errAbort(toString(exc.getMessage()));} void fatalError(const XERCES_CPP_NAMESPACE::SAXParseException& exc) {Err::errAbort(toString(exc.getMessage()));} void startDocument() {m_uiOptionCount = 0;} void endDocument() { if (m_strAnalysis != "") { Verbose::out(1, "\tanalysis = " + m_strAnalysis); if (m_pPgOptions->mustFindOpt("analysis")->m_allowMultiple) { m_pPgOptions->push("analysis", m_strAnalysis); } else { m_pPgOptions->set("analysis", m_strAnalysis); } } } static std::string toString(const XMLCh* const in) { char* p = XERCES_CPP_NAMESPACE::XMLString::transcode(in); AffxString str = p; XERCES_CPP_NAMESPACE::XMLString::release(&p); return str.trim(); } void startElement(const XMLCh* const name, XERCES_CPP_NAMESPACE::AttributeList& attributes) { std::string strElementName = toString(name); // Verbose::out(1, "Element=" + strElementName); m_uiOptionCount++; AffxString strCategoryName; std::string strOptionName; std::string strDescription; std::string strCurrentValue; std::string strGuid; for (unsigned int iIndex = 0; (iIndex < attributes.getLength()); iIndex++) { std::string strAttributeName = toString(attributes.getName(iIndex)); std::string strAttributeType = toString(attributes.getType(iIndex)); std::string strAttributeValue = toString(attributes.getValue(iIndex)); // Verbose::out(1, "AttributeName=" + strAttributeName + ", AttributeType=" + strAttributeType + ", AttributeValue=" + strAttributeValue); if ((strElementName == "ParameterFile") && (strAttributeName == "guid")) { Verbose::out(1, "\tguid = " + strAttributeValue); if (m_pPgOptions->getXMLParameterFileGuid() == "") { m_pPgOptions->m_strXMLParameterFileGuid = strAttributeValue; } break; } else if ((strElementName == "JobOrder") && (strAttributeName == "joGUID")) { Verbose::out(1, "\tguid = " + strAttributeValue); if (m_pPgOptions->getXMLParameterFileGuid() == "") { m_pPgOptions->m_strXMLParameterFileGuid = strAttributeValue; } break; } else if (strAttributeName == "analysis") {strCategoryName = strAttributeValue;} else if (strAttributeName == "name") {strOptionName = strAttributeValue;} else if (strAttributeName == "description") {strDescription = strAttributeValue;} else if (strAttributeName == "currentValue") {strCurrentValue = strAttributeValue;} } if (strElementName == "Parameter") { if (strCategoryName != "") { if (!m_pPgOptions->isOptDefined("analysis")) { Verbose::out(1, "Specified option is not defined. Name: " + strOptionName); } else { if (strCategoryName != m_strPrevCategory) { if (m_strAnalysis != "") { Verbose::out(1, "\tanalysis = " + m_strAnalysis); if (m_pPgOptions->mustFindOpt("analysis")->m_allowMultiple) { m_pPgOptions->push("analysis", m_strAnalysis); } else { m_pPgOptions->set("analysis", m_strAnalysis); } } if (strCurrentValue == "") { m_strAnalysis = strOptionName; } } else { if (strCurrentValue != "") { // if (::getInt(::getInt(strCurrentValue)) == strCurrentValue) // { // m_strAnalysis += "." + strOptionName + "=" + strCurrentValue; // } // else // { m_strAnalysis += "." + strOptionName + "='" + strCurrentValue + "'"; // } } } m_strPrevCategory = strCategoryName; } } else { if (m_pPgOptions->isOptDefined(strOptionName)) { Verbose::out(1, "\t" + strOptionName + " = " + strCurrentValue); if (strOptionName == "xml-file") { m_pPgOptions->setOptionsFromXMLFile(strCurrentValue, *m_pvFileNames); } else { if (m_pPgOptions->mustFindOpt(strOptionName)->m_allowMultiple) { m_pPgOptions->push(strOptionName, strCurrentValue); } else { m_pPgOptions->set(strOptionName, strCurrentValue); } } } else { Verbose::out(1, "Specified option is not defined. Name: " + strOptionName); } } } } }; #endif // using namespace std; ////////// PgOpt::PgOpt() { // set to known starting values. m_shortName=""; m_longName=""; m_help=""; m_type=PgOpt::INVALID_OPT; m_defaultValue=""; m_values.resize(0); // default is no multiples m_allowMultiple=0; } PgOpt::PgOpt(const PgOpt* orig) { m_shortName= orig->m_shortName; m_longName= orig->m_longName; m_help= orig->m_help; m_type= orig->m_type; m_defaultValue= orig->m_defaultValue; m_values= orig->m_values; m_allowMultiple=orig->m_allowMultiple; } void PgOpt::clearValues() { m_values.resize(0); } void PgOpt::allowMutipleValues(int val) { m_allowMultiple=val; } std::string PgOpt::getDefaultValue() const { return m_defaultValue; } void PgOpt::resetToDefault() { clearValues(); } bool PgOpt::isSet() { return (m_values.size()!=0); } void PgOpt::pushValue(const std::string& new_value) { m_values.push_back(new_value); //if ((m_allowMultiple==0) && (m_values.size() > 1) } int PgOpt::getValueCount() const { return (int)m_values.size(); } std::vector PgOpt::getValueVector() const { return m_values; } std::string PgOpt::getValue(size_t idx) const { // off the end. if (idx<0) { Err::errAbort("Out of bounds. (idx<0)"); } // No value is set, return the defaultValue. if (m_values.size()==0) { return getDefaultValue(); } if (idxsize)"); return string("internal error"); } // bool PgOpt::getValueBool(int idx) const { return Convert::toBool(getValue(idx)); } int PgOpt::getValueInt(int idx) const { return Convert::toInt(getValue(idx)); } double PgOpt::getValueDouble(int idx) const { return Convert::toDouble(getValue(idx)); } void PgOpt::push_user_values_into(std::vector& dest_vec) { for (size_t i=0;i",getValue(0).c_str()); } else { int cnt=getValueCount(); for (int i=0;i::const_iterator iter; for(iter = options.m_option_section.begin(); iter != options.m_option_section.end(); iter++) { m_option_section[initial_end+iter->first] = iter->second; } } PgOptions::PgOptions(const PgOptions &options) { m_progName = options.m_progName; m_argv = options.m_argv; m_args = options.m_args; m_usageMsg = options.m_usageMsg; m_strXMLParameterFileName=options.m_strXMLParameterFileName; m_strXMLParameterFileGuid=options.m_strXMLParameterFileGuid; for(size_t i=0; i::const_iterator iter; for(iter = options.m_option_section.begin(); iter != options.m_option_section.end(); iter++) { m_option_section[iter->first] = iter->second; } } void PgOptions::setUsage(const std::string& msg) { m_usageMsg=msg; } std::string PgOptions::getUsage() { return m_usageMsg; } ////////// std::vector PgOptions::getArgVector() { return m_args; } std::string PgOptions::getArg(int idx) { return m_args[idx]; } int PgOptions::getArgCount() { return (int)m_args.size(); } std::string PgOptions::getProgName() { return m_progName; } // void PgOptions::setArgv(const char * const * const argv) { m_argv.clear(); for (const char* const * arg=argv;*arg!=NULL;arg++) { m_argv.push_back(*arg); } // if (0clearValues(); } } void PgOptions::bind(const std::string& opt_name,PgOpt* opt) { // dont add the null string. if (opt_name!="") { m_option_map[opt_name]=opt; } } PgOpt* PgOptions::addPgOpt(const PgOpt* opt) { // We didnt create this option so copy it. PgOpt* opt_copy=new PgOpt(opt); return addPgOpt_nocopy(opt_copy); } PgOpt* PgOptions::addPgOpt_nocopy(PgOpt* opt) { // opt->checkParseIsOk(opt->getDefaultValue()); // optionMapIterator_t i = m_option_map.find(opt->m_longName); if(i!=m_option_map.end()) { Err::errAbort("Option '" + opt->m_longName + "' already defined."); } if(!opt->m_shortName.empty()) { i = m_option_map.find(opt->m_shortName); if(i!=m_option_map.end()) { Err::errAbort("Option '" + opt->m_shortName + "' already defined."); } } // we will eventually free every opt in this vector. m_option_vec.push_back(opt); // bind(opt->m_longName,opt); bind(opt->m_shortName,opt); // return opt; } void PgOptions::defineOptionSection(const std::string §ionName) { m_option_section[(int)m_option_vec.size()] = sectionName; } PgOpt* PgOptions::defineOption(const std::string& shortName, const std::string& longName, PgOpt::PgOptType_t type, const std::string& help, const std::string& defaultValue) { PgOpt* opt=new PgOpt; // copy all the data opt->m_shortName=shortName; opt->m_longName=longName; opt->m_type=type; opt->m_help=help; opt->m_defaultValue=defaultValue; opt->m_values.resize(0); // add return addPgOpt_nocopy(opt); } PgOpt* PgOptions::defOpt(const std::string& shortName, const std::string& longName, PgOpt::PgOptType_t type, const std::string& help, const std::string& defaultValue) { PgOpt* opt=defineOption(shortName,longName,type,help,defaultValue); return opt; } PgOpt* PgOptions::defOptMult(const std::string& shortName, const std::string& longName, PgOpt::PgOptType_t type, const std::string& help, const std::string& defaultValue) { PgOpt* opt=defineOption(shortName,longName,type,help,defaultValue); opt->allowMutipleValues(1); return opt; } void PgOptions::printStringWidth(const std::string& str, int prefix,int currentPos, int maxWidth ) { Util::printStringWidth(cout, str, prefix, currentPos, maxWidth); } void PgOptions::usage(std::set &hiddenOpts, bool printOpts) { PgOpt *opt = NULL; unsigned int maxLength = 0; unsigned int currentLength = 0; size_t i = 0; int extraChars = 6; cout << getUsage(); cout << "\n"; if(printOpts == true) { /* find the length of the longest option name. */ for (i = 0; im_longName) != hiddenOpts.end()) { continue; } // subtract it off if there is a character. size_t length = opt->m_longName.size(); if(maxLength < length) maxLength = (int)length; } // extraChars contains a padded space for options without a short flag, // three ' ' at the beginning and one ' ' at end. maxLength += 4 + extraChars; // cap it if(maxLength > 26) { maxLength = 26; } cout << "\noptions:\n"; /* Loop through and print out the help. */ for(i = 0; im_longName) != hiddenOpts.end()) { continue; } // we might have a short name. if(opt->m_shortName != "") { cout << " -" << opt->m_shortName << ", "; } else { cout << " "; } // we always have a long name. cout << "--" << opt->m_longName << " "; while(currentLength < maxLength) { cout.put(' '); currentLength++; } printStringWidth(opt->m_help + " [default '" + opt->m_defaultValue + "']", maxLength, currentLength); cout << "\n"; } } } int PgOptions::parseArgv(const char * const * const argv, int start) { assert(argv!=NULL); setArgv(argv); // Loop through and match options with arguments. size_t arg_idx=start; while (arg_idxresetToDefault(); } m_argv.resize(0); m_args.resize(0); } void PgOptions::dump() { for (size_t i=0;idump(); } printf("Args:\n"); for (size_t i=0;i> %3d : %s\n",*arg_idx,m_argv[*arg_idx].c_str()); // debug arg_opt=m_argv[*arg_idx]; *arg_idx+=1; // No leading "-"; Add it to the args vec... if (arg_opt[0]!='-') { m_args.push_back(arg_opt); return; } // "--" terminates options; Everything following "--" is an args. if (arg_opt=="--") { while (*arg_idxm_type!=PgOpt::BOOL_OPT) { Err::errAbort("Cant use '--no-' with '"+arg_opt_no+"': Not a boolean option."); } opt->setValue("false"); return; } // didnt find it, treat it as a normal option. } opt = findOpt(arg_opt); if(opt == NULL) { Err::errAbort("Don't recognize option: '" + arg_opt + "'"); } // if (*arg_idxm_type == PgOpt::BOOL_OPT) { // --foo=bar if (have_arg_val==1) { opt->checkParseIsOk(arg_val); opt->setValue(arg_val); } // we might have a following "true/false"... // "--foo true" else { if (*arg_idxsetValue(m_argv[*arg_idx]); *arg_idx+=1; } else { // didnt look like a bool, use opt->setValue("true"); } } // just "program --foo" else { opt->setValue("true"); } } // return; } // Do we take the next value? if (have_arg_val==0) { if (*arg_idxcheckParseIsOk(arg_val)==1) { if (opt->m_allowMultiple==1) { opt->pushValue(arg_val); } else { if (opt->m_longName == "xml-file") { if (opt->isSet()) {Err::errAbort("The xml-file option has already been set. Only one xml-file can be specified.");} std::vector vFileNames; setOptionsFromXMLFile(arg_val, vFileNames); } opt->setValue(arg_val); } } else { Err::errAbort("bad parse setting '" + arg_opt +"' to '" + arg_val + "'"); } } void PgOptions::clear(const std::string &name) { mustFindOpt(name)->clearValues(); } bool PgOptions::getBool(const std::string& opt_name) { PgOpt *opt = mustFindOpt(opt_name); return opt->getValueBool(0); } double PgOptions::getDouble(const std::string& opt_name) { PgOpt *opt = mustFindOpt(opt_name); return opt->getValueDouble(0); } int PgOptions::getInt(const std::string& opt_name) { PgOpt *opt = mustFindOpt(opt_name); return opt->getValueInt(0); } ////////// int PgOptions::argc() { return (int)m_argv.size(); } std::string PgOptions::argv(int idx) { return m_argv[idx]; } /** * Load parameters from an XML file. * * @param strFileName - The name of the XML file to load parameters from. */ void PgOptions::setOptionsFromXMLFile(const std::string& strFileNameIn, std::vector& vFileNames) { #ifndef APT_PGOPTIONS_NO_XERCES AffxString strFileName = strFileNameIn; Verbose::out(1, "*"); Verbose::out(1, "Loading options from file: " + strFileName); if (vFileNames.size() > 7) {Err::errAbort("Possible run away recursion situation found in PgOptions::setOptionsFromXMLFile(...)");} for (int iIndex = 0; (iIndex < (int)vFileNames.size()); iIndex++) { if (Fs::basename(strFileName) == vFileNames[iIndex]) {Err::errAbort("Possible run away recursion situation found in PgOptions::setOptionsFromXMLFile(...)");} } vFileNames.push_back(Fs::basename(strFileName)); if (m_strXMLParameterFileName == "") { m_strXMLParameterFileName = strFileName; } // Initialize the XML4C system try { XERCES_CPP_NAMESPACE::XMLPlatformUtils::Initialize(); } catch (const XERCES_CPP_NAMESPACE::XMLException& toCatch) { Err::errAbort("PgOptions::setParametersFromXMLFile() failed at XMLPlatformUtils::Initialize(). Msg: " + PgOptionsSAXHandler::toString(toCatch.getMessage()) + " FileName: " + strFileName); } // // Create a SAX parser object to use and create our SAX event handlers // and plug them in. // XERCES_CPP_NAMESPACE::SAXParser* parser = new XERCES_CPP_NAMESPACE::SAXParser; PgOptionsSAXHandler handler(this, vFileNames); parser->setDocumentHandler(&handler); parser->setErrorHandler(&handler); parser->setValidationScheme(XERCES_CPP_NAMESPACE::SAXParser::Val_Auto); parser->setDoNamespaces(false); parser->setDoSchema(false); parser->setValidationSchemaFullChecking(false); // // Ok, lets do the progressive parse loop. On each time around the // loop, we look and see if the handler has found what its looking // for. When it does, we fall out then. // unsigned long duration; int errorCount = 0; try { // Create a progressive scan token XERCES_CPP_NAMESPACE::XMLPScanToken token; const unsigned long startMillis = XERCES_CPP_NAMESPACE::XMLPlatformUtils::getCurrentMillis(); try { XMLCh* p = XERCES_CPP_NAMESPACE::XMLString::transcode(strFileName.c_str()); try { XERCES_CPP_NAMESPACE::URLInputSource source(p); if (!parser->parseFirst(source, token)) { Err::errAbort("PgOptions::setParametersFromXMLFile() Cannot open or parse xml-file. FileName: " + strFileName); } } catch(...) { XERCES_CPP_NAMESPACE::LocalFileInputSource source(p); if (!parser->parseFirst(source, token)) { Err::errAbort("PgOptions::setParametersFromXMLFile() Cannot open or parse xml-file. FileName: " + strFileName); } } XERCES_CPP_NAMESPACE::XMLString::release(&p); // // We started ok, so lets call scanNext() until we find what we want // or hit the end. // bool gotMore = true; while (gotMore && !parser->getErrorCount()) { gotMore = parser->parseNext(token); } const unsigned long endMillis = XERCES_CPP_NAMESPACE::XMLPlatformUtils::getCurrentMillis(); duration = endMillis - startMillis; errorCount = parser->getErrorCount(); // // Reset the parser-> In this simple progrma, since we just exit // now, its not technically required. But, in programs which // would remain open, you should reset after a progressive parse // in case you broke out before the end of the file. This insures // that all opened files, sockets, etc... are closed. // parser->parseReset(token); } catch (const XERCES_CPP_NAMESPACE::XMLException& toCatch) { Err::errAbort("PgOptions::setParametersFromXMLFile() failed with an XMLException. Msg: " + PgOptionsSAXHandler::toString(toCatch.getMessage()) + " FileName: " + strFileName); } catch(...) {Err::errAbort("PgOptions::setParametersFromXMLFile() Exception thrown while parsing xml-file. FileName: " + strFileName);} } catch (const XERCES_CPP_NAMESPACE::OutOfMemoryException&) { delete parser; XERCES_CPP_NAMESPACE::XMLPlatformUtils::Terminate(); Err::errAbort("PgOptions::setParametersFromXMLFile() failed with an OutOfMemoryException. FileName: " + strFileName); } catch (const XERCES_CPP_NAMESPACE::XMLException& toCatch) { delete parser; XERCES_CPP_NAMESPACE::XMLPlatformUtils::Terminate(); Err::errAbort("PgOptions::setParametersFromXMLFile() failed with an XMLException. Msg: " + PgOptionsSAXHandler::toString(toCatch.getMessage()) + " FileName: " + strFileName); } // Verbose::out(1, "XMLFileName = " + strFileName + ", OptionCount = " + ToStr(handler.getOptionCount())); Verbose::out(1, "*"); delete parser; XERCES_CPP_NAMESPACE::XMLPlatformUtils::Terminate(); vFileNames.pop_back(); #endif } affxparser/src/fusion/util/PgOptions.h0000644000175200017520000003736214516003651021113 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file PgOptions.h * @author Chuck Sugnet && harley * @date Tue May 3 11:49:37 2005 * @brief Classes for reading program command line options. */ /* @page PgOptions MANUAL: PgOptions (NON-OFFICIAL-RELEASE) @section PgOptionsContents Contents @section PgOptionsIntro Introduction PgOptions is the APT library for working with command line options. With this library, a program can declare the list of options it supports along with their kind and defaults. PgOptions will process the command line and then the program can query what was set. Like standard unix programs, an option of "--" ends option parsing. All the remaning arguments are args to the program. There are three kinds of options: BOOL, DOUBLE and STRING. All three kinds may have a value appended to the option name with "=". (Like: "--double-example=10.5", "--output-dir=foo".) BOOL values may have a prefix of "-no-" or "--no-" to set the value to "false". If the arg after a bool option is "true/1" or "false/0" it is used as the value. Otherwise the option is set to true. (Note that this might make working with a file called "true" ambiguous. In that case use "--" to seperate the args from the option.) Non-option args to the program are put into the "args" list which the program can retreive with "getArg(idx)". @section PgOptionsExample Example Normal usage looks like: @verbatim int main (int argc,char* argv[]) { PgOptions opts; opts->setUsage("A nice paragraph about what this program does."); opts->defineOption("p","print",PgOpt::BOOL_OPT, "Do you want to print the output?", "true"); opts->parseArgv(argv); if (opts->getBool("print")) { doPrint(); } } @endverbatim @section PgOptionsNotes Notes * Please try and write the options in the "positive sense". If the default is to print, then define the option "print" to be true. The user can use "--no-print" when they dont want to. (Dont define "--no-print".) */ #ifndef PGOPTIONS_H #define PGOPTIONS_H // "APT_PGOPTIONS_NO_XERCES" is used to remove the use of // Xerces XML functions in PgOptions. // affy/exact uses PgOptions, but has no need of XML. // providing an option to disable // #include "portability/apt-win-dll.h" #include "util/Err.h" // #include #include #include #include #include /// @class PgOpt /// @brief The definition of this option and the values. class APTLIB_API PgOpt { public: /** Types of allowed options. */ typedef enum PgOptType { INVALID_OPT, BOOL_OPT, DOUBLE_OPT, INT_OPT, STRING_OPT, } PgOptType_t; public: std::string m_shortName; ///< Short name without the '-' (-h -> h). std::string m_longName; ///< Long name without the '--' prefix. // std::string m_help; ///< Maybe help, possibly NULL. PgOptType m_type; ///< Type of option. std::string m_defaultValue; ///< Default value if any. std::vector m_values; ///< Values for this option. int m_allowMultiple; ///< Allow mutiple args //int m_numargs; /// @brief Creates and inits a new PgOpt PgOpt(); PgOpt(const PgOpt* orig); /// @brief Allows mutiple values for this option? /// @param val 1=yes 0=no void allowMutipleValues(int val); /// @brief was this option set by the user? /// @return true if it was set, false if it is the default value. bool isSet(); /// @brief The number of values for this option. /// @return int int getValueCount() const; /// @brief Get the string value of this option /// @param idx (optional) the index /// @return string value std::string getValue(size_t idx=0) const; /// @brief Get the value as a boolean. Abort if it cant be converted. /// @param idx (optional) the index /// @return boolean bool getValueBool(int idx=0) const; /// @brief Get the value as an int. Abort if it cant be converted. /// @param idx (optional) the index /// @return the value as an int int getValueInt(int idx=0) const; /// @brief Get the value as a double. Abort if it cant be converted. /// @param idx (optional) the index /// @return the value as a double double getValueDouble(int idx=0) const; /// @brief The default value of this option /// @return string std::string getDefaultValue() const; /// @brief Clear all the values on this option void clearValues(); /// @brief Set the value of this option to a single value. /// @param new_value void setValue(const std::string &new_value) { m_values.resize(0); pushValue(new_value); } /// @brief Push the new value onto the list of values for this option. /// @param new_value /// @remarks only valid for options which allow mutiple values. void pushValue(const std::string& new_value); /// @brief same as clearValues() void resetToDefault(); std::vector getValueVector() const; /// @brief Check to see if the parse of the string is ok for this option. /// @param value string to parse. /// @return 1 if ok. int checkParseIsOk(const std::string& value) const; /// @brief Push all our user supplied values into another vector. /// @param dest_vec vector to add them to. void push_user_values_into(std::vector& dest_vec); /// @brief Dump the internal state of the option for debugging. void dump() const; }; ////////// ////////// #ifndef APT_PGOPTIONS_NO_XERCES class PgOptionsSAXHandler; #endif /// @brief class APTLIB_API PgOptions { #ifndef APT_PGOPTIONS_NO_XERCES friend class PgOptionsSAXHandler; #endif public: typedef std::map optionMap_t; typedef optionMap_t::iterator optionMapIterator_t; public: ///< Helpful message explaining the program and purpose. std::string m_usageMsg; ///< Vec of all the options defined. std::vector m_option_vec; ///< Map of indexes for option section starts std::map m_option_section; ///< Map option names to PgOpt optionMap_t m_option_map; ///< name of this program. std::string m_progName; ///< A copy argv we were given. std::vector m_argv; ///< Non option arguments. std::vector m_args; // XML Parameter file name std::string m_strXMLParameterFileName; // XML Parameter file guid std::string m_strXMLParameterFileGuid; public: /// @brief Creates a new PgOptions object PgOptions(); /// @brief Creates a new PgOptions from an existing one. PgOptions(const PgOptions &options); /// @brief Destroys a PgOptions object and its PgOpts. ~PgOptions(); /// @brief clear all the data & free memory. void clear(); /// @brief appendOptions void appendOptions(const PgOptions& options); /// @brief Assignment PgOptions& operator=(const PgOptions &options); /// @brief Get the usage info /// @return string std::string getUsage(); /// @brief set the usage info for this program /// @param usage void setUsage(const std::string& usage); /// @brief define an option to be handled by this option parser. /// @param shortName one or two letter option name. /// @param longName full length option name. /// @param type the type of option (string,int,bool,double) /// @param help help text. /// @param defaultVal Default value to use if not set. /// @return pointer to the PgOpt created. /// @remarks This calls addPgOpt to add the created option to the option state. PgOpt* defineOption(const std::string& shortName, const std::string& longName, PgOpt::PgOptType_t type, const std::string& help, const std::string& defaultVal); /// @brief Short form of defineOption /// @param shortName /// @param longName /// @param type /// @param help /// @param defaultVal /// @return PgOpt* defOpt(const std::string& shortName, const std::string& longName, PgOpt::PgOptType_t type, const std::string& help, const std::string& defaultVal); /// @brief Short for to define an option with multiple values. /// @param shortName /// @param longName /// @param type /// @param help /// @param defaultVal /// @return PgOpt* defOptMult(const std::string& shortName, const std::string& longName, PgOpt::PgOptType_t type, const std::string& help, const std::string& defaultVal); /// @brief Define a new section for options /// @param sectionName name for the section /// @return void void defineOptionSection(const std::string §ionName); /// @brief Copies and adds an PgOpt to the internal state. /// @param option option to add /// @return pointer to copy. PgOpt* addPgOpt(const PgOpt *option); /// @brief Adds an PgOpt to the internal state. /// @param option option to add /// @return pointer to the passed in option PgOpt* addPgOpt_nocopy(PgOpt *option); /// @brief Add the option name and PgOpt to our map of name,value pairs /// @param opt_name name to bind. null strings ("") are ignored. /// @param opt option for this name. void bind(const std::string& opt_name,PgOpt* opt); /// @brief Finds the option which is bound to this name. /// @param name /// @return pointer to option or NULL PgOpt* findOpt(const std::string &longName) { optionMapIterator_t i = m_option_map.find(longName); if (i == m_option_map.end()) { return NULL; } return i->second; } /// @brief Same as findOpt, but aborts if not option is not found. /// @param name /// @return pointer to option PgOpt* mustFindOpt(const std::string &name) { PgOpt *opt = findOpt(name); if(opt == NULL) { Err::errAbort("Don't recognize option with name: '" + name + "'."); } return opt; } /// @brief Print a string wrapping at max width from the current position. /// @param str - The cstring to be printed. /// @param prefix - How many spaces to put on begining of newline. /// @param maxWidth - Where to wrap text at. /// @param currentPos - What position in the line is cursor currently at. static void printStringWidth(const std::string& str, int prefix=0,int currentPos=0, int maxWidth=70); /// @brief Print out a litte ditty about program and its usage. /// @param printOpts - Print out options and help for each one? void usage(bool printOpts = true) { std::set toHide; usage(toHide, printOpts); } /// Print out a litte ditty about program and its usage. /// @param hiddenOpts - Set containing the long name of options to hide. /// @param printOpts - Print out options and help for each one? void usage(std::set &hiddenOpts, bool printOpts = true); /// @brief Match the command line arguments from /// @param argv Arguments supplied to program. argv[0] is the program name. /// @param start The position to start parsing at (default 1) /// @return the last position in argv parsed (will stop at "--") int parseArgv(const char * const * const argv, int start = 1); /// @brief the args to m_arg /// @param argv Arguments to copy void setArgv(const char * const * const argv); /// @brief Match the arg found at argv[arg_idx]. /// @param arg_idx arg_idx is updated to point to the next arg to match. void matchOneArg(size_t* arg_idx); /// @brief Clear the values for an option /// @param name the name of the option void clear(const std::string &name); /// @brief Set the value of an option /// @param name the name of the option /// @param new_value the value of the option void set(const std::string &name, const std::string &new_value) { if ((isOptDefined("xml-file")) && (name == "xml-file")) { if (mustFindOpt(name)->isSet()) { Err::errAbort("The xml-file option has already been set. Only one xml-file can be specified."); } std::vector vFileNames; setOptionsFromXMLFile(new_value, vFileNames); } mustFindOpt(name)->setValue(new_value); } /// @brief Add a value to an option vector /// @param name the name of the option /// @param value the value to push void push(const std::string &name, const std::string &value) { mustFindOpt(name)->pushValue(value); } /// @brief Get the value of the option as a string. /// @param opt_name option name (short or long) /// @return the value. Abort if not found. std::string get(const std::string& opt_name) { PgOpt *opt = mustFindOpt(opt_name); return opt->getValue(0); } /// @brief Get the value of the option as a boolean. /// @param opt_name option name (short or long) /// @return the value. Abort if not found. bool getBool(const std::string& opt_name); /// @brief Get the value of the option as a double. /// @param opt_name option name (short or long) /// @return the value. Abort if not found. double getDouble(const std::string& opt_name); /// @brief Get the value of the option as a int. /// @param opt_name option name (short or long) /// @return the value. Abort if not found. int getInt(const std::string& opt_name); /// @brief The number of left over args /// @return int int getArgCount(); /// @brief Get the IDXth arg. /// @param index index of arg to get. /// @return string std::string getArg(int index); /// @brief Get the args as a vector. /// @return vector of args. std::vector getArgVector(); /// @brief The name of the program (argv[0]) /// @return string std::string getProgName(); /// @brief same as clearValues void resetToDefaults(); /// @brief clear all the values from the PgOpts and internal state. void clearValues(); /// @brief A space seperated list of the argv we were given. /// @return string std::string commandLine(); /// @brief The size of the argv we were inited with. /// @return the size of argv int argc(); /// @brief Get the idxth entry in the argv which was supplied. /// @param idx the index. /// @return the entry. std::string argv(int idx); /// @brief Dump the internal state for debugging. void dump(); bool isOptDefined(const std::string& name) {return (findOpt(name) != NULL);} std::string getXMLParameterFileName() {return m_strXMLParameterFileName;} std::string getXMLParameterFileGuid() {return m_strXMLParameterFileGuid;} protected: void setOptionsFromXMLFile(const std::string& strFileName, std::vector& vFileNames); }; #endif /* PGOPTIONS_H */ affxparser/src/fusion/util/Progress.h0000644000175200017520000000311614516003651020763 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /* \file Progress.h Provides a base class for displaying progress. */ #ifndef _Progress_HEADER_ #define _Progress_HEADER_ /*! Provides a base class definition for streaming back messages to the parent process. */ class Progress { public: /*! Sets the upper limit of the range of the progress display. * @param level - level of verbosity desired. * @param mx The maximum number of steps in the task. * @param inc The number of steps performed before the progress display is updated. */ virtual void SetStepProperties(int level, int mx, int inc) = 0; virtual ~Progress() { } /*! Steps the progress ahead by one count. * @param level - What level of verbosity this message should be printed at. */ virtual void Step(int level) = 0; }; #endif affxparser/src/fusion/util/ProgressDot.h0000644000175200017520000000707314516003651021440 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file ProgressDot.h * @author Chuck Sugnet * @date Fri Jan 27 14:57:49 2006 * * @brief ProgressHandler for the console that prints dots to the command line * to signify progress. */ #ifndef PROGRESSDOT_H #define PROGRESSDOT_H // #include "util/ProgressHandler.h" #include "util/Convert.h" // #include #include #include #include #include #include // /** * @brief Prints dots as progress is made. */ class ProgressDot : public ProgressHandler { public: /** * Constructor that takes an stream for doing outputs and level at * which to print messages. * * @param out - * @param verbosity * * @return */ ProgressDot(int verbosity, std::ostream *out) { m_Total=0; m_Verbosity = verbosity; m_Out = out; } /** * Begin a progress reporting. After this initial call the * progressStep call will be called N times where N = the parameter * total passed in below. * * @param verbosity - What level of verbosity is the progress * associated with. Higher levels of verbosity produce more * messages. * @param msg - String message associated with the beginning of this task. * @param total - Expected number of times that the progressStep() * will be called after this. */ void progressBegin(int verbosity, const std::string &msg, int total) { m_Total = total; // flush any C io before doing our IO fflush(NULL); m_StartTime = time(NULL); if(verbosity <= m_Verbosity && m_Out != NULL) { (*m_Out) << msg; m_Out->flush(); } } /** * This function is called when one unit of work has been done. In * general it is expected that the units of work should be roughly * equal in size. * * @param verbosity - At what verbosity level should this step be * displayed. */ void progressStep(int verbosity) { if(verbosity <= m_Verbosity && m_Out != NULL) { m_Out->put('.'); m_Out->flush(); } } /** * Signals the end of progress report. * * @param verbosity - Level of verbosity associated with this progress report. * @param msg - Closing message from calling function. */ void progressEnd(int verbosity, const std::string &msg) { time_t endTime = time(NULL); int t = int( (float)(endTime - m_StartTime) / 60.0 * 100); // convert to minutes if(verbosity <= m_Verbosity && m_Out != NULL) { (*m_Out) << msg << " (" << ToStr((float)t/100) << " min)" << std::endl; m_Out->flush(); } } private: std::ostream *m_Out; ///< Stream to print messages to. Can be NULL to indicate no printing. int m_Total; ///< How many steps total are we expecting? time_t m_StartTime; }; #endif /* PROGRESSDOT_H */ affxparser/src/fusion/util/ProgressHandler.h0000644000175200017520000000601414516003651022261 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file ProgressHandler.h * @author Chuck Sugnet * @date Fri Jan 27 14:31:52 2006 * * @brief Abtract interface for a class which interprets progress. Register your * ProgressHandler with the Verbose class to handle progress calls. * */ #ifndef PROGRESSHANDLER_H #define PROGRESSHANDLER_H #include #include // /** * @brief An abstract class for handling Progress updates. */ class ProgressHandler { public: /** Virtual destructor for a virtual class. */ virtual ~ProgressHandler() {} /** Does this object handle all the calls or just want signals every dotMod */ virtual bool handleAll() { return false; } /** * Begin a progress reporting. After this initial call the * progressStep call will be called N times where N = the parameter * total passed in below. * * @param verbosity - What level of verbosity is the progress * associated with. Higher levels of verbosity produce more * messages. * @param msg - String message associated with the beginning of this task. * @param total - Expected number of times that the progressStep() * will be called after this. */ virtual void progressBegin(int verbosity, const std::string &msg, int total) = 0; /** * This function is called when one unit of work has been done. In * general it is expected that the units of work should be roughly * equal in size. * * @param verbosity - At what verbosity level should this step be * displayed. */ virtual void progressStep(int verbosity) = 0; /** * Signals the end of progress report. * * @param verbosity - Level of verbosity associated with this progress report. * @param msg - Closing message from calling function. */ virtual void progressEnd(int verbosity, const std::string &msg) = 0; /** * What level of verbosity is requested. This is a hint to the class and can * be ignored if desired. * * @param level - Level below which progress messages are printed. */ virtual void setBaseVerbosity(int level) { m_Verbosity = level; } protected: int m_Verbosity; ///< What level of verbosity is processed, higher == more messages. }; #endif /* PROGRESSHANDLER_H */ affxparser/src/fusion/util/RowFile.cpp0000644000175200017520000002627514516003651021074 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file RowFile.cpp * @author Chuck Sugnet * @date Mon May 23 11:26:39 2005 * @brief Class for dealing with line oriented files. */ // #include "util/RowFile.h" // #include "util/Convert.h" #include "util/Err.h" #include "util/Fs.h" #include "util/Util.h" // #include #include #include #include #include using namespace std; /** * Constructor. Open an object with delimiter and comment * characters. * @param delim - word delimiter character. * @param comment - line comment character. */ RowFile::RowFile(char delimChar, char commentChar, char commentChar2): m_Buffer(NULL), m_BufferSize(0), m_LineIx(0), m_Delim(delimChar), m_Comment(commentChar), m_Comment2(commentChar2), m_Reuse(false) { } /** * Open file fileName or die trying. * @param char * fileName - full path of fileName to be opened. */ void RowFile::open(const std::string& fileName) { m_FileName = fileName; Fs::aptOpen( m_In, fileName, ios_base::in); if(m_In.fail()) { Err::errAbort("Can't open file "+FS_QUOTE_PATH(fileName) +" to read."); } m_EndType = determineLineEndingType(m_In); } /** * Destructor. */ RowFile::~RowFile() { if(m_Buffer) { delete[] m_Buffer; } if(m_In.is_open()) { m_In.close(); } }; /** * Close file streams. */ void RowFile::close() { m_In.close(); } /** * Pull the next line from the input m_Buffer or die trying. * * @return - Pointer to string representation of current line or NULL * at EOF. */ const string *RowFile::nextLine() { /* If we're reusing this line just return it again. */ if(m_Reuse) { m_Reuse = false; return &m_CurrentLine; } /* if end of file return NULL. */ if(m_In.eof()) return NULL; /* Check stream state. */ if(m_In.fail()) Err::errAbort("Something bad happened before line: " + ToStr(m_LineIx)); /* Little logic to handle the different line endings possible. */ if(m_EndType == MAC) { getline(m_In, m_CurrentLine, '\r'); } else { getline(m_In, m_CurrentLine, '\n'); // if ms-dos chop off the '\r' if(m_EndType == DOS && m_CurrentLine.length() > 0) m_CurrentLine.erase(m_CurrentLine.end() - 1); } Util::trimString(m_CurrentLine); m_LineIx++; return &m_CurrentLine; } /** * Return the next line that is non-blank and doesn't start with a * comment character. * @return - Pointer to string repesentation of current line or NULL * at EOF. */ const string *RowFile::nextRealLine() { unsigned int notWhiteSpace = 0; while( nextLine() != NULL) { notWhiteSpace = 0; // Skip the first not white space. while(notWhiteSpace < m_CurrentLine.size() && isspace(m_CurrentLine[notWhiteSpace])) notWhiteSpace++; // If found we found a line with characters that doesn't start // with a comment return true. if(m_CurrentLine.size() != 0 && m_CurrentLine[notWhiteSpace] != m_Comment && m_CurrentLine[notWhiteSpace] != m_Comment2) return &m_CurrentLine; } // never found a real line. return NULL; } /** * Chop the next row into words and return them in the vector. * @param words - vector to fill in with words. * @return bool - false at EOF true otherwise. */ bool RowFile::nextRow(std::vector &words) { int len = 0, start = 0, next = 0; if(nextRealLine() == NULL) return false; words.clear(); len = m_CurrentLine.size(); while(start < len) { next = (int)m_CurrentLine.find(m_Delim, start); if(next < 0) { next = m_CurrentLine.size(); // entire string. } words.push_back(m_CurrentLine.substr(start, next - start)); start = next+1; } return true; } /** * Chop the next row into words and return them in the * vector. char* memory in vector is owned elsewhere and will * change after an calls to nextRow or nextLine. * @param words - vector to fill in with words. @return bool - * false at EOF true otherwise. */ bool RowFile::nextCStringRowExpect(std::vector &words, unsigned int expect) { if(!nextCStringRow(words)) return false; if(words.size() != expect) { Err::errAbort("Got " + ToStr(words.size()) + " words instead of " + ToStr(expect) + " expected at line: " + ToStr(m_LineIx)); } return true; } /** * Chop the next row into words and return them in the * vector. char* memory in vector is owned elsewhere and will * change after an calls to nextRow or nextLine. * @param words - vector to fill in with words. @return bool - * false at EOF true otherwise. */ bool RowFile::nextCStringRow(std::vector &words) { int len = 0, start = 0, next = 0; char *mark = 0; char *buff = 0; if(nextRealLine() == NULL) return false; words.clear(); len = m_CurrentLine.size(); /* Use the m_Buffer to copy strings into to avoid lots of new's and deletes. Only expand when necessary. */ if(len + 1 > m_BufferSize) { FreezArray(m_Buffer); m_BufferSize = len+1; m_Buffer = new char[m_BufferSize]; } //strncpy(m_Buffer, m_CurrentLine.c_str(), len); memcpy(m_Buffer, m_CurrentLine.c_str(), len); buff = m_Buffer; /* Trim off any delimiters at beginning and end of line */ while(start < len) { if(!isspace(m_CurrentLine[start])) break; start++; } while(len > start) { if(!isspace(m_CurrentLine[len -1])) break; m_Buffer[len - 1] = '\0'; len--; } /* Loop through and cut up lines into words based on delimiter. */ while(start < len) { next = (int)m_CurrentLine.find(m_Delim, start); if(next < 0) { next = m_CurrentLine.size(); // entire string. } mark = buff+next; *mark = '\0'; words.push_back(buff+start); start = next+1; } return true; } /** * Chop the next row into words and return them in the vector * squawk and die if expected number was different from number * found. * @param words - vector to fill in with words. * @param expect - number of words that should be in next row. * @return bool - false at EOF true otherwise. */ bool RowFile::nextRowExpect(std::vector &words, unsigned int expect) { if(!nextRow(words)) return false; if(words.size() != expect) { Err::errAbort("Got " + ToStr(words.size()) + " words instead of " + ToStr(expect) + " expected at line: " + ToStr(m_LineIx)); } return true; } void RowFile::readHeader(RowFile &rf, std::map > &header, std::vector &lines) { char delim = '='; int startPos = 0; string::size_type delimPos = 0; const string *line = NULL; while((line = rf.nextLine()) != NULL) { startPos = 0; /* Header is prefixed by '%' or "#%" */ if(line->length() == 0) continue; else if(line->length() > 1 && (*line)[0] == rf.m_Comment && (*line)[1] == rf.m_Comment2) startPos = 2; else if(line->length() > 0 && (*line)[0] == rf.m_Comment2) startPos = 1; else { rf.reuseLine(); break; } lines.push_back(string(*line)); delimPos = line->find(delim); if(delimPos == string::npos) Err::errAbort("Couldn't find delimiter: '" + ToStr(delim) + "' in line:\n" + string(*line)); string key = line->substr(startPos, delimPos-startPos); string value = line->substr(delimPos+1, line->length()); if(header.find(key) == header.end()) { // first time seen, create a new vector of values. vector vals; vals.push_back(value); header[key] = vals; } else { vector &vals = header[key]; vals.push_back(value); } } } /** * Write out a header in #%key=value form. */ void RowFile::writeHeader(std::ostream &out, const std::vector &lines) { typedef vector::const_iterator vecI; vecI i; for(i = lines.begin(); i != lines.end(); ++i) out << *i << endl; } /** * Read in a matrix from a file. * @param fileName - Name of file to read from. * @param matrix - where to read data into. * @param skipLines - lines (possibly header?) to skip. * @param skipCols - number of columns (row names?) to skip. */ void RowFile::matrixFromFile(const std::string& fileName, std::vector< std::vector > &matrix, unsigned int skipLines, unsigned int skipCols) { RowFile rf; unsigned int i = 0; vector words; rf.open(fileName.c_str()); for(i = 0; i < skipLines; i++) rf.nextRow(words); while(rf.nextRow(words)) { if(skipCols >= words.size()) Err::errAbort("RowFile::matrixFromFile() - Number of skipCols >= number of cols."); matrix.push_back(vector()); for(i = skipCols; i < words.size(); i++) { matrix[matrix.size() -1].push_back(Convert::toDouble(words[i].c_str())); } } } /** * Read in a matrix from a file. * @param fileName - Name of file to read from. * @param matrix - where to read data into. * @param skipLines - lines (possibly header?) to skip. * @param skipCols - number of columns (row names?) to skip. */ void RowFile::matrixFromFile(const std::string& fileName, std::vector< std::vector > &matrix, unsigned int skipLines, unsigned int skipCols) { RowFile rf; unsigned int i = 0; vector words; rf.open(fileName.c_str()); for(i = 0; i < skipLines; i++) rf.nextRow(words); while(rf.nextRow(words)) { if(skipCols >= words.size()) Err::errAbort("RowFile::matrixFromFile() - Number of skipCols >= number of cols."); matrix.push_back(vector()); for(i = skipCols; i < words.size(); i++) { matrix[matrix.size() -1].push_back(Convert::toFloat(words[i].c_str())); } } } /** * What type of line endings do we see in this input stream? * @param input - stream to look for '\r','\n', or '\r\n' in. * @return type of line endings we expect in the future. */ enum RowFile::FileLineEnding RowFile::determineLineEndingType(std::ifstream &input) { FileLineEnding ending = UNKNOWN; while(input.good()) { char c1 = input.get(); // '\r' alone indicates mac, '\r\n' indicates dos if(c1 == '\r') { if(input.good() && input.get() == '\n') { ending = DOS; } else { ending = MAC; } break; } // is it a unix file? else if(c1 == '\n') { ending = UNIX; break; } } // reset stream back to beginning of file. input.seekg(0); return ending; } affxparser/src/fusion/util/RowFile.h0000644000175200017520000001617314516003651020535 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file RowFile.h * @author Chuck Sugnet * @date Mon May 23 11:27:16 2005 * * @brief Class for dealing with line oriented files. */ #ifndef ROWFILE_H #define ROWFILE_H #include #include #include #include #include // /** * RowFile * @brief Class for reading line oriented files and parsing them * out into vectors of strings easily. */ class RowFile { public: /** Line endings are platform specific, this encodes different types we know of. */ enum FileLineEnding { UNIX = 0, // '\n' DOS = 1, // '\r\n' MAC = 2, // '\r' UNKNOWN = 3 // ??? }; /** * Constructor. Open an object with delimiter and comment * characters. * @param delimChar - word delimiter character. * @param commentChar - line comment character. * @param commentChar2 - second line comment character. */ RowFile(char delimChar = '\t', char commentChar = '#', char commentChar2 = '%'); /** * @brief Destructor. */ ~RowFile(); /** * Open file fileName or die trying. * @param fileName - full path of fileName to be opened. */ void open(const std::string& fileName); /** * Close file streams. */ void close(); /** * Chop the next row into words and return them in the vector * squawk and die if expected number was different from number * found. * @param words - vector to fill in with words. * @param expect - number of words that should be in next row. * @return bool - false at EOF true otherwise. */ bool nextRowExpect(std::vector &words, unsigned int expect); /** * Chop the next row into words and return them in the vector * squawk and die if expected number was different from number * found. char * memory in vector is owned elsewhere and will * change after an calls to nextRow or nextLine. * @param words - vector to fill in with words. * @param expect - number of words that should be in next row. * @return bool - false at EOF true otherwise. */ bool nextCStringRowExpect(std::vector &words, unsigned int expect); /** * Chop the next row into words and return them in the * vector. char* memory in vector is owned elsewhere and will * change after an calls to nextRow or nextLine. * @param words - vector to fill in with words. @return bool - * false at EOF true otherwise. */ bool nextCStringRow(std::vector &words); /** * Chop the next row into words and return them in the vector. * @param words - vector to fill in with words. * @return bool - false at EOF true otherwise. */ bool nextRow(std::vector &words); /** * Set the delimiter. * @param nDelim - new word delimiter. */ void setDelim(char nDelim) { m_Delim = nDelim; } /** * Get the delimiter. * @return current word delimiter. */ char getDelim() { return m_Delim; } /** * Set the comment. * @param nComment - new comment character. */ void setComment(char nComment) { m_Comment = nComment; } /** * Get the comment. * @return current comment character. */ char getComment() { return m_Comment; } /** * Pull the next line from the input buffer or die trying. */ const std::string *nextLine(); /** * Return the next line that is non-blank and doesn't start with a * comment character. */ const std::string *nextRealLine(); /** * Reuse the same line again. */ void reuseLine() { m_Reuse = true; } /** * @brief What line number are we reading? * @return Line number. */ int getCurrentLineNumber() { return m_LineIx; } /** * Fill in a header map with data from the top of the file. */ static void readHeader(RowFile &rf, std::map > &header, std::vector &lines); /** * Write out a header in #%key=value form. */ static void writeHeader(std::ostream &out, const std::vector &lines); /** * Read in a matrix from a file. * @param fileName - Name of file to read from. * @param matrix - where to read data into. * @param skipLines - lines (possibly header?) to skip. * @param skipCols - number of columns (row names?) to skip. */ static void matrixFromFile(const std::string& fileName, std::vector< std::vector > &matrix, unsigned int skipLines=0, unsigned int skipCols=0); /** * Read in a matrix from a file. * @param fileName - Name of file to read from. * @param matrix - where to read data into. * @param skipLines - lines (possibly header?) to skip. * @param skipCols - number of columns (row names?) to skip. */ static void matrixFromFile(const std::string& fileName, std::vector< std::vector > &matrix, unsigned int skipLines=0, unsigned int skipCols=0); /** * @brief What type of file is this one? * @return Type of file that has been determined. */ inline enum FileLineEnding getFileType() { return m_EndType; } /** * What is the name of the rowfile we're reading from? * @return name of file being read. */ std::string getFileName() { return m_FileName; } private: /** * What type of line endings do we see in this input stream? * @param input - stream to look for '\r','\n', or '\r\n' in. * @return type of line endings we expect in the future. */ enum RowFile::FileLineEnding determineLineEndingType(std::ifstream &input); /// Name of file we are reading from. std::string m_FileName; /// Stream that we are reading from. std::ifstream m_In; /// Current line read from the file. std::string m_CurrentLine; /// Utility buffer for chopping into char *s, faster than strings. char *m_Buffer; /// Current size of the buffer. int m_BufferSize; /// Line number in file that is currently read. int m_LineIx; /// Separating delimiter, usually tab ('\t') or comma (',') char m_Delim; /// Comment character usually '#' char m_Comment; /// Secondary comment character, usually '%' char m_Comment2; /// Should we reuse the last line again? bool m_Reuse; /// What type of line endings does this stream use? FileLineEnding m_EndType; }; #endif /* ROWFILE_H */ affxparser/src/fusion/util/SQLite.cpp0000644000175200017520000002156414516003651020662 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file SQLite.cpp * @author Walt Short * @date Fri Sept 28 15:24:08 2007 * * @brief Wrapper for SQLite. * */ // #include "util/SQLite.h" // #include "util/AffxConv.h" #include "util/AffxString.h" #include "util/Fs.h" #include "util/Verbose.h" // #include // A test function. void SQLiteDatabase::test() { // Sample usage code follows... std::string strFileName = "sqlite3.txt"; SQLiteDatabase db; try { Verbose::out(1, "*"); Verbose::out(1, "Create/Open database: sqlite3.txt"); db.open(strFileName); SQLiteRecordset rset(db); Verbose::out(1, "create table Test1 (Id int primary key, Name varchar(255))"); db.execute("create table Test1 (Id int primary key, Name varchar(255))"); Verbose::out(2, "create table Test1 (Id int primary key, Name varchar(255))"); db.execute("create table Test2 (Id int primary key, Name varchar(255))"); Verbose::out(1, "Begin transaction..."); db.beginTransaction(); for (int iIndex = 1; (iIndex < 10); iIndex++) { std::string strSQL = "insert into Test1 values (" + ::getInt(iIndex) + ", 'Test1-" + ::getInt(iIndex) + "')"; Verbose::out(1, "\t" + strSQL); db.execute(strSQL); } for (int iIndex = 1; (iIndex < 10); iIndex++) { std::string strSQL = "insert into Test2 values (" + ::getInt(iIndex) + ", 'Test2-" + ::getInt(iIndex) + "')"; Verbose::out(1, "\t" + strSQL); db.execute(strSQL); } Verbose::out(1, "Commit transaction..."); db.commitTransaction(); Verbose::out(1, "select Test1.id, Test1.Name, Test2.Name from Test1, Test2 where Test1.Id = Test2.id"); rset.open("select Test1.id, Test1.Name, Test2.Name from Test1, Test2 where Test1.Id = Test2.id"); while (rset.fetch()) { int iID = rset.getInteger(0); std::string strName = rset.getString(1); std::string strName2 = rset.getString(2); Verbose::out(1, "\tfetched: " + ::getInt(iID) + ", " + strName + ", " + strName2); } rset.close(); Verbose::out(1, "drop table Test1"); db.execute("drop table Test1"); Verbose::out(1, "drop table Test2"); db.execute("drop table Test2"); Verbose::out(1, "Closing SQLite database"); db.close(); Verbose::out(1, "*"); } catch (SQLiteException& e) {db.rollbackTransaction(); db.close(); Verbose::out(1, e.getMessage());} // End sample usage code } /** * Constructs a SQLiteDatabase object. */ SQLiteDatabase::SQLiteDatabase() { m_pdb = NULL; m_pstmt = NULL; m_bOpen = false; } /** * Destroys a SQLiteDatabase object. * Closes the database if it is left open. */ SQLiteDatabase::~SQLiteDatabase() { close(); } /** * Error handling function. Throws a SQLiteException pointer. * The error code and Message will be combined into the SQLiteException message. * @param iReturnCode - The error code from SQLite. * @param strMessage - The error message. */ void SQLiteDatabase::error(int iReturnCode, const std::string& strMessage) { std::stringstream ss; std::string str; ss << iReturnCode; ss >> str; std::string strMsg; /* Note: the journal file is always created in the same directory as the database file. * Changing the temp directory has no affect on the journal file. */ if ( (iReturnCode == SQLITE_CANTOPEN ) && !Fs::isWriteableDir(Fs::dirname( m_dbName) ) ) { strMsg = "ERROR: sqlite3 update permission denied...journal file write failed because parent directory is not writeable: " + Fs::dirname(m_dbName); } else{ strMsg = "ERROR: SQLiteCode: " + str + ", Message: " + strMessage; } throw SQLiteException(strMsg); } /** * Opens a SQLite database. * @param strFileName - The name of the file to open. * @param bReadOnly - true if the file is to be opened read only, else false. (Defaults to false.) */ void SQLiteDatabase::open(const std::string& strFileName, bool bReadOnly) { /* int iFlags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE; if (bReadOnly) {iFlags = SQLITE_OPEN_READONLY;} int iReturnCode = sqlite3_open_v2(strFileName.c_str(), &m_pdb, iFlags, NULL); if (iReturnCode != SQLITE_OK) {error(iReturnCode, "Failed to open SQLite file:\t" + strFileName);} */ m_dbName = strFileName; std::string tmp_unc_path=Fs::convertToUncPath(m_dbName); int iReturnCode = sqlite3_open(tmp_unc_path.c_str(), &m_pdb); if (iReturnCode != SQLITE_OK) {error(iReturnCode, "Failed to open SQLite file: "+FS_QUOTE_PATH(tmp_unc_path));} sqlite3_extended_result_codes(m_pdb, 1); m_bOpen = true; } /** * Closes a SQLite database. */ void SQLiteDatabase::close() { if (m_bOpen) { int iReturnCode = sqlite3_close(m_pdb); if (iReturnCode != SQLITE_OK) {error(iReturnCode, "Failed to close SQLite file.");} m_bOpen = false; m_dbName.clear(); } } /** * Executes a SQL statement. Does not return any data. * @param strSQL - The SQL statement to execute. * @param bError - Set to true to trap SQL statement errors, or false to ignore. * @param abortOnErr - true calls Err::abort on SQLite exception, else just throw the exception. */ void SQLiteDatabase::execute(const std::string& strSQL, bool bError, bool abortOnErr ) { try { if (m_pdb == NULL) { throw SQLiteException("ERROR: SQLite Database has not been opened."); } char* pszMsg = NULL; int iReturnCode = sqlite3_exec(m_pdb, strSQL.c_str(), NULL, NULL, &pszMsg); if (bError) { if (iReturnCode != SQLITE_OK) { std::string str = pszMsg; sqlite3_free(pszMsg); error(iReturnCode, str); } } else { if (iReturnCode != SQLITE_OK) { sqlite3_free(pszMsg); } } } catch(SQLiteException e) { if ( abortOnErr ) { Err::errAbort(std::string("SQLite Database exception: ") + e.getMessage()); } else { throw(e); } } } /** * Constructs a SQLiteRecordset object. * @param pdb - The pointer to the SQLiteDatabase. */ SQLiteRecordset::SQLiteRecordset(SQLiteDatabase& db) { m_pdb = &db; m_bFirstRow = false; m_bEOF = true; m_bOpen = false; } /** * Destroys a SQLiteRecordset object. * Closes the recordset if left open. */ SQLiteRecordset::~SQLiteRecordset() { close(); } /** * Prepares a SQL statement. * @param strSQL - The SQL statement. */ void SQLiteRecordset::open(const std::string& strSQL) { const char* pszTail = NULL; int iReturnCode = sqlite3_prepare_v2(&(m_pdb->getConnection()), strSQL.c_str(), (int)strSQL.length(), &m_pstmt, &pszTail); if (iReturnCode != SQLITE_OK) {m_pdb->error(iReturnCode, "Failed to prepare SQL statement.");} m_bOpen = true; m_bFirstRow = true; m_bEOF = false; } /** * Closes the SQLiterecordset. */ void SQLiteRecordset::close() { if (m_bOpen) { //int iReturnCode = sqlite3_finalize(m_pstmt); // if (iReturnCode != SQLITE_OK) {m_pdb->error(iReturnCode, "Failed to finalize SQL statement.");} } m_bOpen = false; m_bFirstRow = false; } /** * Steps through the result set from a prepared SQL statement. * @return - bool true if more rows to fetch, or false if the end of the recordset has been reached. */ bool SQLiteRecordset::fetch() { int iReturnCode = sqlite3_step(m_pstmt); if (iReturnCode != SQLITE_ROW) {m_bEOF = true;} return !m_bEOF; } /** * Get a String value from the recordset. * @param iColumnIndex - The zero based index of the column. * @return - String value retrieved from column. */ std::string SQLiteRecordset::getString(int iColumnIndex) { AffxString str; char* p = (char*)sqlite3_column_text(m_pstmt, iColumnIndex); if (p != NULL) {str = p;} return str; } /** * Get an integer value from the recordset. * @param iColumnIndex - The zero based index of the column. * @return - integer value retrieved from column. */ int SQLiteRecordset::getInteger(int iColumnIndex) { return sqlite3_column_int(m_pstmt, iColumnIndex); } /** * Get a Double value from the recordset. * @param iColumnIndex - The zero based index of the column. * @return - Double value retrieved from column. */ double SQLiteRecordset::getDouble(int iColumnIndex) { if (sqlite3_column_type(m_pstmt, iColumnIndex) == SQLITE_NULL) {return std::numeric_limits::quiet_NaN();} return sqlite3_column_double(m_pstmt, iColumnIndex); } affxparser/src/fusion/util/SQLite.h0000644000175200017520000000577414516003651020334 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file SQLite.h * @author Walt Short * @date Fri Sept 28 15:24:08 2007 * * @brief Wrapper for SQLite. * */ #ifndef _SQLITE_H_ #define _SQLITE_H_ #include "../../../external/sqlite/sqlite3.h" // #include // class SQLiteDatabase; /** * SQLiteException - Exception class for SQLite. */ class SQLiteException { public: std::string m_strMessage; /** * Constructs a SQLiteException object. * @param strMessage - The error message. */ SQLiteException(const std::string& str) {m_strMessage = str;} /** * Returns the error message. * @return - The error message string. */ std::string getMessage() {return m_strMessage;} }; /** * SQLiteRecordset - Class for handling query result sets. */ class SQLiteRecordset { protected: SQLiteDatabase* m_pdb; sqlite3_stmt* m_pstmt; bool m_bFirstRow; bool m_bEOF; bool m_bOpen; public: SQLiteRecordset(SQLiteDatabase& db); virtual ~SQLiteRecordset(); void open(const std::string& strSQL); void close(); bool fetch(); std::string getString(int iColumnIndex); int getInteger(int iColumnIndex); double getDouble(int iColumnIndex); }; /** * SQLiteDatabase - Class for handling SQLite databases. */ class SQLiteDatabase { public: SQLiteDatabase(); virtual ~SQLiteDatabase(); protected: std::string m_dbName; sqlite3* m_pdb; sqlite3_stmt* m_pstmt; bool m_bOpen; public: static void test(); /** * Returns the sqlite3 reference. * @return - The sqlite3 reference. */ sqlite3& getConnection() {return *m_pdb;} void error(int iReturnCode, const std::string& strMessage); void open(const std::string& strFileName, bool bReadOnly = false); void close(); void execute(const std::string& strSQL, bool bError = true, bool abortOnErr = true); SQLiteRecordset* newRecordset() {return new SQLiteRecordset(*this);} void deleteRecordset(SQLiteRecordset** pprset) {delete *pprset; *pprset = NULL;} /** * Begin a transaction. */ void beginTransaction() {execute("begin");} /** * Commit a transaction. */ void commitTransaction() {execute("commit");} /** * Rollback a transaction. */ void rollbackTransaction() {if (m_bOpen) {execute("rollback", false);}} }; #endif affxparser/src/fusion/util/SocketBase.cpp0000644000175200017520000001352614516003651021543 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifdef WIN32 #include #include #else #include #include #include #include #include #include #endif /* WIN32 */ #include "util/SocketBase.h" #include "util/Err.h" #include "util/Convert.h" using namespace std; #define HEADER_SIZE 20 // 5 * 4 byte integers void SocketBase::parseMsgFromBuffer(std::vector &buffer, std::vector *messages) { int expected = -1; int channel = -1; int type = -1; int protocol = -1; int reserved = -1; while (buffer.size() >= HEADER_SIZE) { // header has at least HEADER_SIZE bytes... const char *cBuf = &m_Buffer[0]; unPackInt32NetOrder(cBuf, expected); unPackInt32NetOrder(cBuf+4, type); unPackInt32NetOrder(cBuf+8, channel); unPackInt32NetOrder(cBuf+12, protocol); APT_ERR_ASSERT(protocol == SOCKET_PROTOCOL_VER, "Expecting protocol version: " + ToStr(SOCKET_PROTOCOL_VER) + " but got: " + ToStr(protocol)); unPackInt32NetOrder(cBuf+16, reserved); if (buffer.size() >= expected+HEADER_SIZE) { string current(expected, 0); for (int i = 0; i < expected; i++) { current[i] = buffer[i + HEADER_SIZE]; } if (messages != NULL) { messages->push_back(current); } else { for (int i = 0; i < m_Handlers.size(); i++) { if (m_Handlers[i]->handlesType(type) && m_Handlers[i]->handlesChannel(channel)) { m_Handlers[i]->handleMessage(type, channel, current); } } } } else { break; // incomplete message, leave it for later } buffer.erase(buffer.begin(), buffer.begin() + expected + HEADER_SIZE); } } bool SocketBase::fillBufferFromSocket(int &conn) { bool connected = true; fd_set readFds; FD_ZERO(&readFds); FD_SET(conn, &readFds); struct timeval tv; tv.tv_sec = 0; tv.tv_usec = 0; char buf[2048]; int nBytes = 0; if (select(conn + 1, &readFds, NULL, NULL, &tv) == -1) { APT_ERR_ABORT("select() failed."); } if (FD_ISSET(conn, &readFds)) { // we got one!! memset(buf, 0, 2048); if ((nBytes = recv(conn, buf, sizeof(buf), 0)) <= 0) { // got error or connection closed by client if (nBytes == 0) { // connection closed Verbose::out(3, "SocketBase::checkForMsgss() - socket " + ToStr(conn) + " hung up"); CLOSE_SOCKET(conn); // bye! conn = -1; connected = false; } else { APT_ERR_ABORT("checkForMsgs() error."); } } else { for (int i = 0; i < nBytes; i++) { m_Buffer.push_back(buf[i]); } } } return connected; } bool SocketBase::checkForMsgs(std::vector &messages, int &socket) { bool connected = fillBufferFromSocket(socket); messages.clear(); parseMsgFromBuffer(m_Buffer, &messages); return connected; } bool SocketBase::checkForMsgs(int &socket) { bool connected = fillBufferFromSocket(socket); parseMsgFromBuffer(m_Buffer, NULL); return connected; } int SocketBase::sendAll(int socket, const char *buf, int toSend, int &sent) { int total = 0; // how many bytes we've sent int bytesLeft = toSend; // how many we have left to send int curSend = 0; while(total < toSend) { curSend = send(socket, buf+total, bytesLeft, 0); if (curSend == -1) { break; } total += curSend; bytesLeft -= curSend; } sent = total; // return number actually sent here int rv = -1; if (curSend != 0) rv = 0; return rv; } void SocketBase::unPackInt32NetOrder(const char *buf, int32_t &x) { int32_t z = (buf[0]<<24) | (buf[1]<<16) | (buf[2]<<8) | buf[3]; x = ntohl(z); } void SocketBase::packInt32NetOrder(char *buf, int32_t x) { int32_t z = htonl(x); *buf++ = z>>24; *buf++ = z>>16; *buf++ = z>>8; *buf++ = z; } void SocketBase::addHandler(SocketHandler *handler) { m_Handlers.push_back(handler); } void SocketBase::sendMsg(const std::string &msg, int type, int channel, int &conn) { fd_set readFds; fd_set writeFds; FD_ZERO(&readFds); FD_SET(conn, &readFds); writeFds = readFds; struct timeval tv; tv.tv_sec = 0; tv.tv_usec = 0; if (select(conn + 1, &readFds, &writeFds, NULL, &tv) == -1) { APT_ERR_ABORT("select() failed."); } if (FD_ISSET(conn, &writeFds)) { // we got one!! int bufSize = msg.size() + HEADER_SIZE; // 12 bytes in header char *buf = new char[bufSize]; memset(buf, 0, bufSize); int32_t size = msg.size(); packInt32NetOrder(buf, size); packInt32NetOrder(buf+4, type); packInt32NetOrder(buf+8, channel); packInt32NetOrder(buf+12, SOCKET_PROTOCOL_VER); packInt32NetOrder(buf+16, 0); for (int i = 0; i < size; i++) { buf[i+HEADER_SIZE] = msg.at(i); } int sent = 0; if (sendAll(conn, buf, bufSize, sent) == -1) { APT_ERR_ABORT("Error sending data to fd: " + ToStr(conn)); } if (sent != bufSize) { APT_ERR_ABORT("Error expecting to send: " + ToStr(bufSize) + " but only got: " + ToStr(sent)); } FreezArray(buf); } } affxparser/src/fusion/util/SocketBase.h0000644000175200017520000001235614516003651021210 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file SocketBase.h * @author Chuck Sugnet * @date Mon Jan 25 11:29:11 2010 * * @brief Some utilities for dealing with reading and writing to sockets. */ #ifndef _UTIL_SOCKETBASE_H #define _UTIL_SOCKETBASE_H // #include "portability/affy-base-types.h" #include "util/SocketHandler.h" // #include #include #ifdef _WIN32 // Windows includes are different for sockets than unix. #include //QZ: windows.h will load winsock.h automatically. It seems we do not use any winsock2.h specific functions. Ellen has tested out the changes. #else #include #endif #ifdef _WIN32 #define CLOSE_SOCKET(_socket) { shutdown(_socket,SD_BOTH); closesocket(_socket); } #else #define CLOSE_SOCKET(_socket) shutdown(_socket,SHUT_RDWR) #endif /** * Class with static utility functions for working with Sockets. * * The messages passsed back and forth have a standard message header which consists of * three 4 byte integers consisting of the message size, message type and message channel * This information is used to know when the full message has been recieved and what to do * with the resulting following the header. The type of the message would be something like the * format and the channel would be which handler would be interested. So for exampe a basic type * would be "text" and a basic channel would be "stdout", or progress. This is analagous to the * type being the format of the signal coming in (eg high definition or regular tv) and the * channel being the address of where to send the data (eg hbo television channel). * * Msg format: * @todo - supported types, versions, etc.?
 int32_t|int32_t|int32_t|int32_t|int32_t | bytes
  size  | type  |channel|version|reserved| msg bytes
 
* * */ class SocketBase { public: /** * Send a message over multiple socket send calls as necessary * * @param socket - Socket to write to * @param buf - Buffer to send over socket * @param toSend - How long is buffer * @param sent - Number characters actually sent * * @return */ virtual int sendAll(int socket, const char *buf, int toSend, int &sent); /** * Tokenize the buffer into distinct messages * * @param buffer - String to break into individual messages * @param messages - Vector of messages to fill in */ virtual void parseMsgFromBuffer(std::vector &buffer, std::vector *messages); /** * Try to read new messages from socket. * * @param messages - Vector to fill with char * messages from socket. * @param socket - socket to read messages from. Set to -1 if disconnected. */ virtual bool checkForMsgs(std::vector &messages, int &socket); /** * Try to read new messages from socket. * * @param socket - socket to read messages from. Set to -1 if disconnected. */ virtual bool checkForMsgs(int &socket); /** * Write char * messages into a socket. * * @param msg - Message to write to the socket. * @param type - What format of message? e.g. SocketHandler::TEXT * @param channel - What channel or sort of message? e.g. SocketHandler::VERBOSE * @param socket - Socket that could be written to. */ void sendMsg(const std::string &msg, int type, int channel, int &conn); /** * Add a handler for incoming messages. This handler will have a chance * see the message type and channel and decide if it should do something. * Like a java listener or a regular callback * * @param - * */ virtual void addHandler(SocketHandler *handler); /** * Take a 4 byte 32bit int from buff and parse it into x. * * @param buf - Buffer with at least 4 bytes in it. * @param x - Integer to be filled in. */ static void unPackInt32NetOrder(const char *buf, int32_t &x); /** * Pack int x into first 4 bytes pointed at by buf. * * @param buf - Buffer containing at least 4 bytes to write int into * @param x - integer to be written. */ static void packInt32NetOrder(char *buf, int32_t x); /** * Fill in our buffer from the socket connection * * @param conn - Socket fd desc * * @return */ bool fillBufferFromSocket(int &conn); protected: /// Data coming over socket, including any partial messages std::vector m_Buffer; std::vector m_Handlers; #ifdef WIN32 WSADATA m_WsaData; ///< Windows data socket structure #endif }; #endif /* SOCKETBASE_H */ affxparser/src/fusion/util/SocketClient.cpp0000644000175200017520000000656214516003651022111 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifdef WIN32 /* Windows includes are different for sockets than unix. */ #include #else #include #include #include #include #include #include #endif /* WIN32 */ #include "util/SocketBase.h" #include "util/SocketServer.h" #include "util/SocketClient.h" #include "util/Err.h" #include "util/Verbose.h" using namespace std; SocketClient::SocketClient() { m_Socket = -1; #ifdef WIN32 Err::configureErrHandler(true, true); Err::setNewLineOnError(true); Err::setThrowStatus(false); // Err::setExitOnError(true); Err::setExitOnErrorValue(0); //// if (WSAStartup(MAKEWORD(2,0), &m_WsaData) != 0) { APT_ERR_ABORT("WSAStartup failed.\n"); } #endif } SocketClient::~SocketClient() { cleanUp(); } void SocketClient::cleanUp() { if (m_Socket >= 0) { // Verbose::out(1, "Cleaning up client socket."); CLOSE_SOCKET(m_Socket); } #ifdef WIN32 WSACleanup(); #endif } void SocketClient::socketConnect(const std::string &host, const std::string &port) { struct addrinfo hints, *servinfo, *p; int rv; m_Host = host; m_Port = port; memset(&hints, 0, sizeof hints); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; if ((rv = getaddrinfo(host.c_str(), port.c_str(), &hints, &servinfo)) != 0) { APT_ERR_ABORT("getaddrinfo: " + ToStr(gai_strerror(rv))); } // loop through all the results and connect to the first we can for(p = servinfo; p != NULL; p = p->ai_next) { if ((m_Socket = socket(p->ai_family, p->ai_socktype, p->ai_protocol)) == -1) { continue; } if (connect(m_Socket, p->ai_addr, p->ai_addrlen) == -1) { CLOSE_SOCKET(m_Socket); continue; } break; } freeaddrinfo(servinfo); if (p == NULL) { APT_ERR_ABORT("Failed to connect - host: " + host + " port: " + port); } #ifdef _WIN32 int flag=1; rv=setsockopt(m_Socket,IPPROTO_TCP,TCP_NODELAY,(char*)&flag,sizeof(int)); APT_ERR_ASSERT(rv==0, "return from setsockopt"); #endif Verbose::out(3, "SocketClient::socketConnect() - connected"); } void SocketClient::checkForMsgs(std::vector &messages) { if(m_Socket < 0) { APT_ERR_ABORT("Error: Must call socketConnect() before checkForMsgs()."); } SocketBase::checkForMsgs(messages, m_Socket); } void SocketClient::sendMsg(const std::string &msg, int type, int channel) { if(m_Socket < 0) { APT_ERR_ABORT("Error: Must call socketConnect() before sendMsg()."); } SocketBase::sendMsg(msg, type, channel, m_Socket); } affxparser/src/fusion/util/SocketClient.h0000644000175200017520000000452514516003651021553 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file SocketClient.h * @author Chuck Sugnet * @date Mon Jan 25 11:07:19 2010 * * @brief Basic version of a client that will open a socket to a server and * read/write messages from that socket. * */ #ifndef SOCKETCLIENT_H #define SOCKETCLIENT_H #include #include "util/SocketBase.h" /** * Class for reading and writing to a server over a socket. */ class SocketClient : public SocketBase { public: /// Constructor SocketClient(); /// Destructor closes sockets ~SocketClient(); void cleanUp(); /** * Open a connection to a listening server. * * @param host - ip address or name of computer to connect to, often just 127.0.0.1 * @param port - which port to try and connect on. */ void socketConnect(const std::string &host, const std::string &port); /** * Try reading from the socket to see if there is anything sent. * * @param messages - data from the socket as delivered by recv() */ void checkForMsgs(std::vector &messages); /** * Write string to open socket. * * @param msg - Message to write into the socket */ void sendMsg(const std::string &msg, int type, int channel); private: int m_Socket; ///< listening socket descriptor std::string m_Host; ///< Host connected to std::string m_Port; ///< Port that client is connected to std::vector m_Sockets; ///< List of open sockets/file descriptors currently connected to, sorted by file descriptor }; #endif /* SOCKETCLIENT_H */ affxparser/src/fusion/util/SocketEngine.cpp0000644000175200017520000001774514516003651022105 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifdef _MSC_VER #define _CRT_SECURE_NO_WARNINGS #endif // #include #include #include #include #include #include #ifdef WIN32 #include #include #else #include #include #include #include #include #include #include #endif /* WIN32 */ // #include "util/SocketEngine.h" // #include "portability/affy-system-api.h" #include "util/MsgSocketHandler.h" #include "util/SocketTextHandler.h" #include "util/Verbose.h" using namespace std; SocketEngine::SocketEngine(const std::string &program, const std::string &jobFile, const std::string &host, unsigned int port) : m_Program(program), m_Host(host), m_Port(port) { m_PgOpts.push_back(m_Program); m_PgOpts.push_back("--xml-file"); m_PgOpts.push_back(jobFile); initVars(); } SocketEngine::SocketEngine(const std::string &program, const std::vector &args, const std::string &host, unsigned int port) : m_Program(program), m_Host(host), m_Port(port) { m_PgOpts.resize(args.size() + 1); m_PgOpts[0] = m_Program; copy(args.begin(), args.end(), m_PgOpts.begin() + 1); initVars(); } SocketEngine::~SocketEngine() { } bool SocketEngine::run() { // Our handler that will fire the appropriate verbose and progress calls locally SocketTextHandler handler; server.addHandler(&handler); unsigned int portUsed = server.socketOpen(m_Host, m_Port); if (portUsed == 0) { Verbose::out(1,"Unable to establish a socket connection. This run will abort"); return false; } m_Port = portUsed; string port = ToStr(m_Port); char host_port_buf[100]; snprintf(host_port_buf,sizeof(host_port_buf), "%s:%s", m_Host.c_str(),port.c_str()); m_PgOpts.push_back("--console-off"); m_PgOpts.push_back("true"); m_PgOpts.push_back("--use-socket"); m_PgOpts.push_back(host_port_buf); #ifdef _WIN32 doWindows(server, handler, m_Host, port, m_PgOpts); #else doUnix(server, handler, m_Host, port, m_PgOpts); #endif return handler.successfulRun(); } int SocketEngine::doWindows(SocketServer &server, SocketTextHandler &handler, string &host, string &port, vector commandArgs) { #ifdef _WIN32 string command; for (size_t i = 0; i < commandArgs.size(); i++) { command = command + "\"" + commandArgs[i] + "\" "; } Verbose::out(1,"Command is: " + command); ws = new TCHAR[command.size()+1]; try { int i=0; for( std::string::const_iterator it = command.begin(); it != command.end(); it++ ) { // Verbose::out(1, "Doing character: " + ToStr(i) + " of " + ToStr(command.length())); ws[i++] = use_facet< ctype >(cout.getloc()).widen( *it); } ws[command.size() - 1 ] = NULL; Verbose::out(1, "Done doing ws"); if(!CreateProcess( m_lpApplicationName, ws, m_lpProcessAttributes, m_lpThreadAttributes, m_bInheritHandles, m_dwCreationFlags, m_lpEnvironment, m_lpCurrentDirectory, &m_StartInfo, &m_ProcInfo)) { Err::errAbort("Could not run the program."); } Verbose::out(1,"Waiting for connections."); while(1) { std::vector messages; bool connected = false; bool beenConnected = false; int numFound = server.acceptNewConnection(); if (numFound > 0) { Verbose::out(1, "Found connections."); } DWORD exitVal; bool rv = GetExitCodeProcess(m_ProcInfo.hProcess, &exitVal); if (exitVal != STILL_ACTIVE) { if (exitVal == 0) { break; } else { Err::errAbort("Process failed with code: " + ToStr(rv)); } } if (server.isConnected()) { beenConnected = true; connected = true; bool stillConnected = server.processNewMessages(); if (!stillConnected) { Verbose::out(1,"Lost connection..."); break; } } if (!server.isConnected()) { Verbose::out(1,".", false); } Sleep(500); } } catch (...) { TerminateProcess(m_ProcInfo.hProcess,0); delete []ws; server.cleanUp(); throw; } delete []ws; #endif return 0; } void SocketEngine::TerminateLaunchedApp() { #ifdef _WIN32 try { DWORD exitVal; int counter = 0; delete []ws; bool rv = GetExitCodeProcess(m_ProcInfo.hProcess, &exitVal); if (exitVal == STILL_ACTIVE) TerminateProcess(m_ProcInfo.hProcess,0); } catch(...) { } #endif server.cleanUp(); } int SocketEngine::doUnix(SocketServer &server, SocketTextHandler &handler, string &host, string &port, vector commandArgs) { #ifndef _WIN32 int pid = fork(); cout << "host " << host << " port " << port; if(pid > 0) { try { //server.socketOpen(host, port); // sleep(2); std::vector messages; bool connected = false; bool beenConnected = false; while(1) { int numFound = server.acceptNewConnection(); if (numFound > 0) { cout << "Found " << numFound << " new connections." << endl; } if (server.isConnected()) { beenConnected = true; connected = true; bool stillConnected = server.processNewMessages(); if (!stillConnected) { cout << "Lost connection..." << endl; break; } } if (!server.isConnected()) { cout << "."; cout.flush(); } struct timespec tWait, tRet; tWait.tv_sec = 0; tWait.tv_nsec = 100000000; // nanosleep(1); nanosleep(&tWait, &tRet); } } catch (...) { cout << "Caught exception. Killing child process: " << pid << endl; int rv = kill(pid, 15); // if termination signal doesn't work then hard kill... if (rv != 0) { rv = kill(pid, 9); } } } else if (pid == 0) { vector execvp_args; // @todo this should be some sort of util function. for (int i=0;i #include #include "util/SocketServer.h" class SocketTextHandler; /** @brief SocketEngine design notes: The SocketEngine utilizes a simple message passing protocol over a socket to run an APT engine cleass in a separate process. The idea being that it is cleaner and easier to have GUI programs like GTC run APT programs in a separate engine rather than try to run them via COM objects or managed from C# code. The GUI programs traditionally tie into the global Verbose object's reporting system. For time reasons we didn't want to rewrite all of that code so the SocketEngine will handle running the other process remotely and translate the messages that come over the socket to the local Verbose object. This way all of the existing code that ties into the local Verbose object will work unchanged. There are two phases to the socket engine. The first is the connection of the foriegn process to the local SocketServer ojbect. @image html SocketCommunicationConnect.png "Phase 1: Delegate process connects to the SocketServer in supervisor process." The second phase is when the delegate process passes messages back from the delegate process Verbose calls to the supervising process and they are translated to calls in the local Verbose object. @image html SocketCommunicationMsg.png "Phase 2: Delegate process passes messages to the supervisor process which fires the usual calls in Verbose." */ class SocketEngine { public: SocketEngine(const std::string &program, const std::string &jobFile, const std::string &host="127.0.0.1", unsigned int port=7889); SocketEngine(const std::string &program, const std::vector &args, const std::string &host="127.0.0.1", unsigned int port=7889); ~SocketEngine(); SocketServer server; #ifdef WIN32 PROCESS_INFORMATION* getProcessInformation() {return &m_ProcInfo;} STARTUPINFO* getStartupInfo() {return &m_StartInfo;} void setApplicationName(LPTSTR lpApplicationName) { m_lpApplicationName = lpApplicationName; } void setProcessAttributes(LPSECURITY_ATTRIBUTES lpProcessAttributes) { m_lpProcessAttributes = lpProcessAttributes; } void setThreadAttributes(LPSECURITY_ATTRIBUTES lpThreadAttributes) { m_lpThreadAttributes = lpThreadAttributes; } void setInheritHandles(BOOL bInheritHandles) { m_bInheritHandles = bInheritHandles; } void setCreationFlags(DWORD dwCreationFlags) { m_dwCreationFlags = dwCreationFlags; } void setEnvironment(LPVOID lpEnvironment) { m_lpEnvironment = lpEnvironment; } void setCurrentDir(LPTSTR lpCurrentDirectory) { m_lpCurrentDirectory = lpCurrentDirectory; } #endif bool run(); void TerminateLaunchedApp(); private: int doWindows(SocketServer &server, SocketTextHandler &handler, std::string &host, std::string &port, std::vector commandArgs); int doUnix(SocketServer &server, SocketTextHandler &handler, std::string &host, std::string &port, std::vector commandArgs); std::string m_Program; std::vector m_PgOpts; std::string m_Host; unsigned int m_Port; #ifdef WIN32 PROCESS_INFORMATION m_ProcInfo; STARTUPINFO m_StartInfo; LPTSTR m_lpApplicationName; LPSECURITY_ATTRIBUTES m_lpProcessAttributes; LPSECURITY_ATTRIBUTES m_lpThreadAttributes; BOOL m_bInheritHandles; DWORD m_dwCreationFlags; LPVOID m_lpEnvironment; LPTSTR m_lpCurrentDirectory; TCHAR *ws; void initVars(); #else void initVars() {}; // no-op on windows #endif /* WIN32 */ }; #endif /* SOCKETENGINE_H */ affxparser/src/fusion/util/SocketHandler.cpp0000644000175200017520000000525114516003651022242 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "util/SocketHandler.h" using namespace std; bool SocketHandler::evenPrecedingSlash(const string &s, int pos) { int count = 0; while (--pos >= 0) { if (s[pos] == '\\') { count++; } else { break; } } return count % 2 == 0; } bool SocketHandler::isEscaped(const string &s, char c, int pos) { APT_ERR_ASSERT(s[pos] == c, "Char: " + ToStr(c) + " not found at Pos: " + ToStr(pos) + " in string: " + s); if (pos == 0) { return false; } if (pos == 1 && s[0] != '\\') { return false; } if (pos > 1 && ((s[pos - 1] != '\\') || evenPrecedingSlash(s, pos))) { return false; } return true; } int SocketHandler::findNextNonEscape(const string &s, char c, int start) { for (size_t i = start; i < s.size(); i++) { if (s[i] == c && !isEscaped(s, c, i)) { return i; } } return -1; } string SocketHandler::serializeMap(map &pairs) { map::const_iterator iter; string buff; for (iter = pairs.begin(); iter != pairs.end(); iter++) { APT_ERR_ASSERT(iter->first.find('=') == string::npos, "Can't have '=' delimiter in keys."); buff.append(iter->first); buff.append("="); buff.append(Util::escapeString(iter->second, ',')); buff.append(","); } return buff; } map SocketHandler::deserializeMap(const string &s) { map pairs; int pos = s.find('='); int currentPos = 0; while (pos != string::npos) { string key = s.substr(currentPos, pos-currentPos); int end = findNextNonEscape(s, ',', pos+1); APT_ERR_ASSERT(end >=0 , "Didn't find ',' delimiter in string: " + s.substr(pos, s.size()-pos) + " for: " + s); string val = s.substr(pos+1, (end-pos)-1); val = Util::deEscapeString(val); pairs[key] = val; currentPos = end+1; pos = s.find('=', currentPos); } return pairs; } affxparser/src/fusion/util/SocketHandler.h0000644000175200017520000000703214516003651021706 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef SOCKETHANDLER_H #define SOCKETHANDLER_H #include #include #include #include "util/Convert.h" #include "util/Err.h" #include "util/Util.h" #define SOCKET_PROTOCOL_VER 1 /** * A listener class that will be called by SocketBase to parse and utilize * messages over a socket. */ class SocketHandler { public: /// What type/format of message coming across enum MsgType { UNKNOWN_TYPE, TEXT, LAST_TYPE }; /// What channel of message coming across (like stdout,stderr, etc.) enum MsgChannel { UNKNOWN_CHANNEL, VERBOSE, PROGRESS, KILLMSG, COMPLETION, LAST_CHANNEL }; /** * Determine if this handler should be called on messages from this channel * @param channel - What channel (e.g. SocketHandler::VERBOSE) * @return true if this handler is interested in messages from this channel */ virtual bool handlesChannel(int channel) { return m_Channels.find(channel) != m_Channels.end(); } /** * Determine if this handler should be called on messages from this type * @param type - What type (e.g. SocketHandler::TEXT) * @return true if this handler is interested in messages from this type */ virtual bool handlesType(int type) { return m_Types.find(type) != m_Types.end(); } /** * Take a message and do whatever functionality this handler * implements from simply displaying message to * * @param type - What type (i.e. text, xml, ) of message is this message. * @param channel - What channel (i.e. verbose, progress, etc.) is this message. * @param msg - Data to be parsed out */ virtual void handleMessage(int type, int channel, const std::string &msg) = 0; /** * Turn a map into a serialized string like "key1=val1,key2=val2," where * value strings are escaped to protect the ',' characters. */ static std::string serializeMap(std::map &pairs); /** * Turn a serialized map back into a full map by parsing out the key=val pairs */ static std::map deserializeMap(const std::string &s); protected: /** * Is there an even number of escape slashes preceeding position? */ static bool evenPrecedingSlash(const std::string &s, int pos); /** * Is this character at position pos escaped? */ static bool isEscaped(const std::string &s, char c, int pos); /** * Find the next occurance of c after start that is not escaped. */ static int findNextNonEscape(const std::string &s, char c, int start); std::set m_Channels; /// Collection of channels that a handler is interested in std::set m_Types; /// Collection of types that a handler is interested in }; #endif /* SOCKETHANDLER_H */ affxparser/src/fusion/util/SocketServer.cpp0000644000175200017520000001223014516003651022126 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifdef WIN32 #include #else #include #include #include #include #include #include #endif /* WIN32 */ #include "util/Err.h" #include "util/SocketServer.h" #include "util/SocketBase.h" using namespace std; SocketServer::SocketServer() { m_ListenSocket = -1; m_RWSocket = -1; #ifdef WIN32 if (WSAStartup(MAKEWORD(2,0), &m_WsaData) != 0) { Err::errAbort("Unable to initialize socket communication."); } #endif } SocketServer::~SocketServer() { cleanUp(); } void SocketServer::cleanUp() { if (m_ListenSocket >= 0) { CLOSE_SOCKET(m_ListenSocket); } if (m_RWSocket >= 0) { CLOSE_SOCKET(m_RWSocket); } #ifdef WIN32 WSACleanup(); #endif } unsigned int SocketServer::socketOpen(const std::string &host, unsigned int &startPort) { unsigned int lastPort = startPort + 5; while (startPort <= lastPort) { try { if (socketOpen(host, ToStr(startPort)) == true) return (startPort); startPort++; } catch(...) { startPort++; } } return 0; } bool SocketServer::socketOpen(const std::string &host, const std::string &port) { int rv = 0; struct addrinfo hints, *ai = NULL, *p = NULL; // get us a socket and bind it memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_PASSIVE; if ((rv = getaddrinfo(host.c_str(), port.c_str(), &hints, &ai)) != 0) { return false; //APT_ERR_ABORT("getaddrinfo() error: " + ToStr(gai_strerror(rv))); } for(p = ai; p != NULL; p = p->ai_next) { m_ListenSocket = socket(p->ai_family, p->ai_socktype, p->ai_protocol); if (m_ListenSocket < 0) { continue; } // Dont think we actually want to do this, as it may prevent // a relaunch of this app to reuse the socket. // http://msdn.microsoft.com/en-us/library/ms740621(VS.85).aspx // // #ifdef _WIN32 // char yes[256]; // setsockopt(m_ListenSocket, SOL_SOCKET, SO_EXCLUSIVEADDRUSE, yes, sizeof(yes)); // #endif if (bind(m_ListenSocket, p->ai_addr, p->ai_addrlen) < 0) { CLOSE_SOCKET(m_ListenSocket); continue; } break; } freeaddrinfo(ai); // all done with this // if we got here, it means we didn't get bound if (p == NULL) { return false; //APT_ERR_ABORT("Failed to bind host " + host + " port: " + port); } // listen if (listen(m_ListenSocket, 10) == -1) { return false; //APT_ERR_ABORT("Listening failed."); } return true; } bool SocketServer::isConnected() { return m_RWSocket >= 0; } bool SocketServer::checkForMsgs(std::vector &messages) { if(m_RWSocket < 0) { APT_ERR_ABORT("Error: Must call socketOpen() before checking for messages."); } return SocketBase::checkForMsgs(messages, m_RWSocket); } bool SocketServer::processNewMessages() { return SocketBase::checkForMsgs(m_RWSocket); } void SocketServer::sendMsg(const std::string &msg, int type, int channel) { if(m_RWSocket < 0) { APT_ERR_ABORT("Error: Must call socketOpen() before sending messages."); } SocketBase::sendMsg(msg, type, channel, m_RWSocket); } bool SocketServer::acceptNewConnection() { if(m_ListenSocket < 0) { APT_ERR_ABORT("Error: Must call socketOpen() before checking for connections."); } fd_set m_ReadFds; FD_ZERO(&m_ReadFds); FD_SET(m_ListenSocket, &m_ReadFds); struct timeval tv; tv.tv_sec = 0; tv.tv_usec = 0; if (select(m_ListenSocket + 1, &m_ReadFds, NULL, NULL, &tv) == -1) { APT_ERR_ABORT("select"); } struct sockaddr_storage remoteaddr; // client address int newfd = -1; socklen_t addrlen; vector toAdd; if (FD_ISSET(m_ListenSocket, &m_ReadFds)) { // we got one!! // handle new connections addrlen = sizeof(remoteaddr); newfd = accept(m_ListenSocket, (struct sockaddr *)&remoteaddr, &addrlen); if (newfd == -1) { APT_ERR_ABORT("accept"); } else { m_RWSocket = newfd; Verbose::out(2, "SocketServer: new connection found:" + ToStr(m_RWSocket) ); return true; } } return false; } // get sockaddr, IPv4 or IPv6: void *SocketServer::getInAddr(struct sockaddr *sa) { if (sa->sa_family == AF_INET) { return &(((struct sockaddr_in*)sa)->sin_addr); } return &(((struct sockaddr_in6*)sa)->sin6_addr); } affxparser/src/fusion/util/SocketServer.h0000644000175200017520000000705614516003651021605 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file SocketServer.h * @author Chuck Sugnet * @date Mon Jan 25 11:17:58 2010 * * @brief Basic version of a server that will listen for connections on a dedicated * socket and then listen on those connections for new messages from clients. * */ #pragma once #ifndef SOCKETSERVER_H #define SOCKETSERVER_H #include #include #include "util/SocketBase.h" /** * Basic version of a server that will listen for connections on a dedicated * socket and then listen on those connections for new messages from clients. */ class SocketServer : public SocketBase { public: /// Constructor SocketServer(); /// Destructor, clean up open sockets. ~SocketServer(); /** * Open a connection to a listening server with a range of possible ports to try. * * @param host - ip address or name of computer to open connection on, often just 127.0.0.1 * @param port - which port number to start the open connection request with. * * This is current coded to allow for 5 trys. TBD: is this a good number of ports * Should the caller control the number of trys. */ unsigned int socketOpen(const std::string &host, unsigned int &startPort); /** * Open a connection to a listening server. * * @param host - ip address or name of computer to open connection on, often just 127.0.0.1 * @param port - which port to open. */ bool socketOpen(const std::string &host, const std::string &port); /** * Try reading from the socket to see if there is anything sent. * * @param messages - data from the socket as delivered by recv() */ bool checkForMsgs(std::vector &messages); /** * Try reading from the socket to see if there is anything sent. */ bool processNewMessages(); /** * Write string to open socket. * * @param msg - Message to write into the socket */ void sendMsg(const std::string &msg, int type, int channel); /** * See if any clients have connected to server. * * @return - Number of clients connected in this function */ bool acceptNewConnection(); /** * Wrapper to get IPv4 or IPv6 as appropriate * * @param sa - Pointer to socket address structure * * @return - Pointer to correct socket address */ static void *getInAddr(struct sockaddr *sa); /** * Check to see if the server has any connections yet. * * @return */ bool isConnected(); void cleanUp(); private: int m_ListenSocket; ///< listening socket descriptor for new connection int m_RWSocket; ///< Client socket for reading and writing std::string m_Port; ///< Port open for new connections. ///< Address information for socket connections }; #endif /* SOCKETSERVER_H */ affxparser/src/fusion/util/SocketTextHandler.cpp0000644000175200017520000000500514516003651023104 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2011 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "util/SocketTextHandler.h" #include "util/Convert.h" #include "util/Err.h" #include "util/Verbose.h" SocketTextHandler::SocketTextHandler() { m_Channels.insert(VERBOSE); m_Channels.insert(PROGRESS); m_Channels.insert(KILLMSG); m_Types.insert(TEXT); m_Finished = false; m_SuccessfulRun = true; } bool SocketTextHandler::hasFinished() { return m_Finished; } bool SocketTextHandler::successfulRun() { return m_SuccessfulRun; } void SocketTextHandler::handleMessage(int type, int channel, const std::string &msg) { APT_ERR_ASSERT(m_Types.find(type) != m_Types.end(), "Don't support type: " + ToStr(type)); APT_ERR_ASSERT(m_Channels.find(channel) != m_Channels.end(), "Don't support channel: " + ToStr(channel)); std::map param = SocketHandler::deserializeMap(msg); int verbosity = Convert::toInt(param["VERBOSITY"]); if (channel == VERBOSE) { Verbose::out(verbosity, param["MSG"], false); } else if (channel == COMPLETION) { Verbose::out(verbosity, "Sucessful Completion", true); m_Finished = true; } else if (channel == PROGRESS) { std::string value = param["VALUE"]; if (value == "BEGIN") { int total = Convert::toInt(param["TOTAL"]); Verbose::progressBegin(verbosity, param["MSG"], total, 1, total); } else if(value == "STEP") { Verbose::progressStep(verbosity); } else if(value == "END") { Verbose::progressEnd(verbosity, param["MSG"]); } } else if (channel == KILLMSG){ Verbose::out(verbosity, "Unsuccessful Completion", true); m_Finished = true; m_SuccessfulRun = false; } else { APT_ERR_ABORT("Don't recognize channel: " + ToStr(channel)); } } affxparser/src/fusion/util/SocketTextHandler.h0000644000175200017520000000357314516003651022561 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef SOCKETTEXTHANDLER_H #define SOCKETTEXTHANDLER_H #include #include #include "util/SocketHandler.h" /** * Basic Socket Handler (Listener) that parses out * verbose and progress messages from a socket stream. */ class SocketTextHandler : public SocketHandler { public: /// Constructor SocketTextHandler(); /// Has the socket send the "completion" message? bool hasFinished(); /// was the run successful? bool successfulRun(); /** * Take a message and do whatever functionality this handler * implements from simply displaying message to * * @param type - What type (i.e. text, xml, ) of message is this message. * @param channel - What channel (i.e. verbose, progress, etc.) is this message. * @param msg - Data to be parsed out */ virtual void handleMessage(int type, int channel, const std::string &msg); protected: bool m_Finished; ///< Has the socket sent the "completed" signal? bool m_SuccessfulRun; // was the run succesful or aborted }; #endif /* SOCKETTEXTHANDLER_H */ affxparser/src/fusion/util/SocketUtil.cpp0000644000175200017520000000751714516003651021611 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifdef WIN32 #include #include #else #include #include #include #include #include #include #endif /* WIN32 */ #include "util/SocketUtil.h" #include "util/Err.h" #include "util/Convert.h" using namespace std; void SocketUtil::parseMsgFromBuffer(std::string &buffer, std::vector &messages) { messages.clear(); int expected = -1; string current = buffer; int pos = current.find(':'); while (pos != string::npos) { string eStr = current.substr(0, pos); expected = Convert::toInt(eStr); APT_ERR_ASSERT(expected >= 0, "Can't expect messages of negative length."); current = current.substr(pos + 1); if (current.size() >= expected) { messages.push_back(current.substr(0, expected)); } else { break; // incomplete message, leave it for later } current = current.substr(expected); pos = current.find(':'); } buffer = current; } void SocketUtil::checkForMsgs(std::vector &messages, int &conn) { messages.clear(); fd_set readFds; FD_ZERO(&readFds); FD_SET(conn, &readFds); struct timeval tv; tv.tv_sec = 0; tv.tv_usec = 0; char buf[2048]; int nBytes = 0; if (select(conn + 1, &readFds, NULL, NULL, &tv) == -1) { APT_ERR_ABORT("select() failed."); } if (FD_ISSET(conn, &readFds)) { // we got one!! memset(buf, 0, 2048); if ((nBytes = recv(conn, buf, sizeof(buf), 0)) <= 0) { // got error or connection closed by client if (nBytes == 0) { // connection closed Verbose::out(1, "SocketUtil::checkForMsgss() - socket " + ToStr(conn) + " hung up"); CLOSE_SOCKET(conn); // bye! conn = -1; } else { APT_ERR_ABORT("checkForMsgs() error."); } } else { m_Buffer.append(buf); } } parseMsgFromBuffer(m_Buffer, messages); } int SocketUtil::sendAll(int socket, const char *buf, int toSend, int &sent) { int total = 0; // how many bytes we've sent int bytesLeft = toSend; // how many we have left to send int curSend = 0; while(total < toSend) { curSend = send(socket, buf+total, bytesLeft, 0); if (curSend == -1) { break; } total += curSend; bytesLeft -= curSend; } sent = total; // return number actually sent here int rv = -1; if (curSend != 0) rv = 0; return rv; } void SocketUtil::sendMsg(const std::string &msg, int &conn) { fd_set readFds; fd_set writeFds; FD_ZERO(&readFds); FD_SET(conn, &readFds); writeFds = readFds; struct timeval tv; tv.tv_sec = 0; tv.tv_usec = 0; if (select(conn + 1, &readFds, &writeFds, NULL, &tv) == -1) { APT_ERR_ABORT("select() failed."); } if (FD_ISSET(conn, &writeFds)) { // we got one!! string msgSend = ToStr(msg.size()) + ":" + msg; int sent = 0; if (sendAll(conn, msgSend.c_str(), msgSend.size(), sent) == -1) { APT_ERR_ABORT("Error sending data to fd: " + ToStr(conn)); } } } affxparser/src/fusion/util/SocketUtil.h0000644000175200017520000000502114516003651021242 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file SocketUtil.h * @author Chuck Sugnet * @date Mon Jan 25 11:29:11 2010 * * @brief Some utilities for dealing with reading and writing to sockets. */ #ifndef SOCKETUTIL_H #define SOCKETUTIL_H #include #include #ifdef WIN32 #define CLOSE_SOCKET closesocket #else #define CLOSE_SOCKET close #endif /** * Class with static utility functions for working with Sockets. * */ class SocketUtil { public: /** * Send a message over multiple socket send calls as necessary * * @param socket - Socket to write to * @param buf - Buffer to send over socket * @param toSend - How long is buffer * @param sent - Number characters actually sent * * @return */ virtual int sendAll(int socket, const char *buf, int toSend, int &sent); /** * Tokenize the buffer into distinct messages * * @param buffer - String to break into individual messages * @param messages - Vector of messages to fill in */ virtual void parseMsgFromBuffer(std::string &buffer, std::vector &messages); /** * Try to read new messages from socket. * * @param messages - Vector to fill with char * messages from socket. * @param socket - socket to read messages from. Set to -1 if disconnected. */ virtual void checkForMsgs(std::vector &messages, int &socket); /** * Write char * messages into a socket. * * @param msg - Message to write to the socket. * @param socket - Socket that could be written to. */ virtual void sendMsg(const std::string &msg, int &socket); protected: /// Data coming over socket, including any partial messages std::string m_Buffer; }; #endif /* SOCKETUTIL_H */ affxparser/src/fusion/util/TableFile.cpp0000644000175200017520000002521114516003651021341 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file TableFile.cpp * @author Chuck Sugnet * @date Tue Jun 28 07:45:07 2005 * * @brief Class for dealing with column oriented files. */ #include "util/TableFile.h" // #include "util/Convert.h" #include "util/Err.h" #include "util/Fs.h" #include "util/RowFile.h" #include "util/Util.h" #include "util/Verbose.h" // #include // using namespace std; /** * Constructor. * @param delim - Character to use for word separator. i.e. ',' or '\\t' * @param comment - Character that comment lines begin with. */ TableFile::TableFile(char delim, char comment, bool useRowNames, bool useColNames) : m_Delim(delim), m_Comment(comment), m_UseRowNames(useRowNames), m_UseColNames(useColNames) { } /** * Destructor. */ TableFile::~TableFile() { unsigned int i; for(i = 0; i < m_MemToFree.size(); i++) { char *p = m_MemToFree[i]; delete [] p; } } /** * Look up the index of column with name specified. * @param colName - Name of column of interest. * @return - npos if not found, column index otherwise. */ unsigned int TableFile::colIndex(const std::string& colName) { assert(colName!=""); TMapConstIter iter = m_ColNameMap.find(colName); if(iter == m_ColNameMap.end()) return npos; return iter->second; } /** * Look up the index of row with name specified. * @param rowName - Name of row of interest. * @return - npos if not found, row index otherwise. */ unsigned int TableFile::rowIndex(const std::string& rowName) { assert(rowName!=""); TMapConstIter iter = m_RowNameMap.find(rowName); if(iter == m_RowNameMap.end()) return npos; return iter->second; } /** * Read data from file specified. * @param fileName - file to be read. * @return - true if successful. */ bool TableFile::open(const std::string& fileName) { std::vector requiredCols; return open(fileName, requiredCols); } /** * Read data from file specified. * @param fileName - file to be read. * @param requiredCols - column names that must be present. * @return - true if successful. */ bool TableFile::open(const std::string& fileName, const std::vector &requiredCols) { assert(fileName!=""); vector found(requiredCols.size()); vector words; char *name = NULL; int rowOffset = 0; unsigned int i = 0; int count = 0; RowFile rf; rowOffset = (m_UseRowNames == true) ? 1 : 0; std::string tmp_unc_path=Fs::convertToUncPath(fileName); rf.open(tmp_unc_path.c_str()); /* Read in any header. */ rf.readHeader(rf, m_Header, m_HeaderLines); /* Read in column names. */ if(!(rf.nextRow(words)) || words.empty()) Err::errAbort("Nothing after header in file: "+FS_QUOTE_PATH(tmp_unc_path)); if(m_UseColNames) { /* Read in the column header names. */ for(i = 0; i < words.size(); i++) { name = Util::cloneString(words[i].c_str()); //m_MemToFree.push_back(name); /* Was this a required column? */ for(unsigned int requiredIx = 0; requiredIx < requiredCols.size(); requiredIx++) { if(requiredCols[requiredIx] == name) { found[requiredIx] = true; } } if(m_ColNameMap.find(name) != m_ColNameMap.end()) Verbose::out(1, "Warning: Duplicate name: " + words[i] + " in column headers from file: " + Fs::basename(fileName)); m_ColNameMap[name] = m_ColNames.size(); m_ColNames.push_back(words[i]); } /* Check to make sure we found the required column names. */ for(unsigned int requiredIx = 0; requiredIx < requiredCols.size(); requiredIx++) { if(found[requiredIx] == false) { Err::errAbort("Didn't find required column name: '" + requiredCols[requiredIx] + "' in file: " + fileName); } } } else { rf.reuseLine(); } /* Read in each row. */ while(rf.nextRow(words)) { string nameSpoof; // Spoof the column names if not being read. if(!m_UseColNames && m_Data.empty()) { unsigned int colIx = 0; if(m_UseRowNames) colIx = 1; // WAS for (colIx = colIx; ...) BUT that gives a compile // warning of an unused/unnecessary expression. for(; colIx < words.size(); colIx++) { m_ColNames.push_back(ToStr(colIx)); } } if(words.size() - rowOffset != m_ColNames.size()) Err::errAbort("Expecting " + ToStr(m_ColNames.size()) + " words but got " + ToStr(words.size() - rowOffset) + " at line " + ToStr(rf.getCurrentLineNumber())); assert(words.size() > 0); if(m_UseRowNames) { name = Util::cloneString(words[0].c_str()); } else { nameSpoof = Convert::toString(count); name = Util::cloneString(nameSpoof.c_str()); } m_MemToFree.push_back(name); if(m_RowNameMap.find(name) != m_RowNameMap.end()) Err::errAbort("Duplicate name: " + words[0] + " in row names."); m_RowNameMap[name] = m_Data.size(); if(m_UseRowNames) { m_RowNames.push_back(words[0]); words.erase(words.begin()); } else { m_RowNames.push_back(nameSpoof); } m_Data.push_back( words ); count++; } rf.close(); return true; } /** * Write out words in vector delimited by specified character. * @param out - stream to write to. * @param data - vector of data to write. * @param delim - delimiter to separate words with. */ void TableFile::writeVector(std::ostream &out, std::vector data, char delim) { unsigned int i = 0; for( i = 0; i < data.size() - 1; i++) { out << data[i]; out.put(delim); } if(data.size() > 0) out << data[data.size()-1]; out << endl; } /** * Write data to file specified. * @param fileName - name of file to write to. * @return - true if successful. */ bool TableFile::write(const std::string& fileName) { assert(fileName!=""); if(m_Data.empty()) Err::errAbort("TableFile::write() - No data do write."); ofstream out(fileName.c_str()); unsigned int i = 0; RowFile::writeHeader(out, m_HeaderLines); writeVector(out, m_ColNames, m_Delim); for(i = 0; i < m_Data.size(); i++) { out << m_RowNames[i]; out.put(m_Delim); writeVector(out, m_Data[i], m_Delim); } return true; } /** * Rip the column out of a file by name filling in the data in colVec. * * @param fileName - File to read column from. * @param colVec - Strings to be filled in from column. * @param colName - Name of column to be read in. * @param skipCols - Number of (non-commented) rows to skip before header. * @param unique - If true check to make sure every entry is unique. * * @return true if successful false otherwise. */ bool TableFile::columnFromFile(const std::string& fileName, std::vector &colVec, const std::string& colName, unsigned int skipCols, bool unique) { RowFile rf; vector words; rf.open(fileName.c_str()); unsigned int colIx = npos; /* Skip rows. */ while(skipCols > 0 && rf.nextRow(words)) { skipCols--; } /* Make sure there is something after the skipped entries. */ if(!(rf.nextRow(words)) || words.empty()) Err::errAbort("Nothing after header in file: " + string(fileName)); for(unsigned int i = 0; i < words.size(); i++) { if(words[i] == colName) { if(colIx == npos) { colIx = i; } else { Verbose::out(1, "Warning: column name: " + ToStr(colName) + " occurs multiple times in: " + ToStr(fileName) + " using first column."); } } } /* Did we find a column with that name? */ if(colIx == npos) return false; return columnFromRowFile(rf, colVec, colIx, unique); } /** * Rip the column out of a file by index filling in the data in colVec. * * @param fileName - File to read column from. * @param colVec - Strings to be filled in from column. * @param colIx - Column index to read. * @param skipCols - Number of (non-commented) rows to skip before header. * @param unique - If true check to make sure every entry is unique. * * @return true if successful false otherwise. */ bool TableFile::columnFromFile(const std::string& fileName, std::vector &colVec, unsigned int colIx, unsigned int skipCols, bool unique) { RowFile rf; vector words; rf.open(fileName.c_str()); /* Skip rows. */ while(skipCols > 0 && rf.nextRow(words)) { skipCols--; } /* Make sure there is something there. */ if(!(rf.nextRow(words)) || words.empty()) Err::errAbort("Nothing after header in file: " + string(fileName)); rf.reuseLine(); return columnFromRowFile(rf, colVec, colIx, unique); } /** * Rip a column out of an open RowFile. * * @param rf - RowFile to read from. * @param colVec - Strings to be filled in with column's data. * @param colIx - Column index to read. * @param unique - If true check to make sure every entry is unique. * * @return true if successful false otherwise. */ bool TableFile::columnFromRowFile(RowFile &rf, std::vector &colVec, unsigned int colIx, bool unique) { map seen; vector words; string fileName = rf.getFileName(); /* Read entries. */ while(rf.nextRow(words)) { if(colIx >= words.size()) { Err::errAbort("Trying to read column: " + ToStr(colIx) + " from row with only: " + ToStr(words.size()) + " columns at line: " + ToStr(rf.getCurrentLineNumber()) + " in file: " + fileName); } if(unique) { if(seen.find(words[colIx]) != seen.end()) { Err::errAbort("Entry: '" + words[colIx] + "' has already been seen in file: " + fileName + " in column " + ToStr(colIx) + "."); } else { seen[words[colIx]] = true; } } colVec.push_back(words[colIx]); } return true; } affxparser/src/fusion/util/TableFile.h0000644000175200017520000002062414516003651021011 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file TableFile.h * @author Chuck Sugnet * @date Mon Jun 27 19:55:06 2005 * * @brief Class for dealing with column oriented files. */ #ifndef TABLEFILE_H #define TABLEFILE_H #include "util/RowFile.h" #include "util/Util.h" // #include #include #include #include #include #include // /** * TableFile - File for reading and using files that have delimited * rows of data. Specifically for R table files and the * like. */ class TableFile { public: /// Error value indicating not found. static const unsigned int npos = UINT_MAX; /** * Constructor. * @param delim - Character to use for word separator. i.e. ',' or '\\t' * @param comment - Character that comment lines begin with. * @param useRowNames - Is first column the names of the rows? * @param useColNames - Is the first row the column headers? */ TableFile(char delim = '\t', char comment = '#', bool useRowNames = false, bool useColNames = true); /** * Destructor. */ ~TableFile(); /** * Read data from file specified. * @param fileName - file to be read. * @return - true if successful. */ bool open(const std::string& fileName); /** * Read data from file specified. * @param fileName - file to be read. * @param requiredCols - column names that must be present. * @return - true if successful. */ bool open(const std::string& fileName, const std::vector &requiredCols); /** * Write data to file specified. * @param fileName - name of file to write to. * @return - true if successful. */ bool write(const std::string& fileName); /** * Return number of columns in data. * @return - number of columns. */ size_t numCols() { return m_ColNames.size(); } /** * Return number of rows in data. * @return - number of rows. */ size_t numRows() { return m_RowNames.size(); } /** * Look up the index of column with name specified. * @param colName - Name of column of interest. * @return - npos if not found, column index otherwise. */ unsigned int colIndex(const std::string& colName); /** * Look up the index of row with name specified. * @param rowName - Name of row of interest. * @return - npos if not found, row index otherwise. */ unsigned int rowIndex(const std::string& rowName); /** * Return data at the position specified in table. * @param rowIx - row number. * @param colIx - column number. * @return - string representation of data. */ std::string &getData(unsigned int rowIx, unsigned int colIx) { return m_Data.at(rowIx).at(colIx); } /** * Return column name at specified index. * @param colIx - column number of interest. * @return - reference to column name at index of interest. */ std::string &getColName(unsigned int colIx) { return m_ColNames.at(colIx); } /** * Return row name at specified index. * @param rowIx - Row number of interest. * @return - reference to row name at index of interest. */ std::string &getRowName(size_t rowIx) { return m_RowNames.at(rowIx); } /** * Write out words in vector delimited by specified character. * @param out - stream to write to. * @param data - vector of data to write. * @param delim - delimiter to separate words with. */ static void writeVector(std::ostream &out, std::vector data, char delim); /** * Rip the column out of a file by name filling in the data in colVec. * * @param fileName - File to read column from. * @param colVec - Strings to be filled in from column. * @param colName - Name of column to be read in. * @param skipCols - Number of (non-commented) rows to skip before header. * @param unique - If true check to make sure every entry is unique. * * @return true if successful false otherwise. */ static bool columnFromFile(const std::string& fileName, std::vector &colVec, const std::string& colName, unsigned int skipCols, bool unique=false); /** * Rip the column out of a file by index filling in the data in colVec. * * @param fileName - File to read column from. * @param colVec - Strings to be filled in from column. * @param colIx - Column index to read. * @param skipCols - Number of (non-commented) rows to skip before header. * @param unique - If true check to make sure every entry is unique. * * @return true if successful false otherwise. */ static bool columnFromFile(const std::string& fileName, std::vector &colVec, unsigned int colIx, unsigned int skipCols, bool unique=false); /** * Rip a column out of an open RowFile. * * @param rf - RowFile to read from. * @param colVec - Strings to be filled in with column's data. * @param colIx - Column index to read. * @param unique - If true check to make sure every entry is unique. * * @return true if successful false otherwise. */ static bool columnFromRowFile(RowFile &rf, std::vector &colVec, unsigned int colIx, bool unique=false); /** * @brief Change state to use row names. */ inline void setUseRowNames() { m_UseRowNames = true; } /** * @brief Are we using row names? * @return true if using row names, false otherwise. */ inline bool getUseRowNames() { return m_UseRowNames; } /** * Set the flag to look for column names (i.e. column headers). * @param use - should column names be used */ inline void setUseColNames(bool use) { m_UseColNames = use; } /** * Set the flag to look for row names (i.e. row ids). * @param use - should row names be used */ inline void setUseRowNames(bool use) { m_UseRowNames = use; } /** * @brief Get the value of a layout key=value header pair. * @param key String to lookup value of. * * @return value of key/value pair. */ const std::vector &getHeaderValue(const std::string &key) { std::map >::iterator iter; iter = m_Header.find(key); if(iter == m_Header.end()) Err::errAbort("Don't recognize header key: " + key); return iter->second; } private: char m_Delim; ///< Delimiter in table. char m_Comment; ///< Comment character for data. bool m_UseRowNames; ///< Should row names be expected bool m_UseColNames; ///< Should row names be expected ///< Header parameters from %thisKey=thisParam std::map > m_Header; ///< All header lines for easy recovery. std::vector m_HeaderLines; ///< Name of columns. std::vector m_ColNames; ///< Name of rows. std::vector m_RowNames; ///< Core table of words. std::vector< std::vector > m_Data; /** Iterator to walk through the map. */ typedef std::map::iterator TMapIter; /** Iterator to walk through the map. */ typedef const std::map::iterator TMapConstIter; /** Used as hash of column names. */ std::map m_ColNameMap; /** Used as hash of row names. */ std::map m_RowNameMap; /** Way to quickly free up memory in hash. */ std::vector m_MemToFree; }; #endif /* TABLEFILE_H */ affxparser/src/fusion/util/TextFileCheck.h0000644000175200017520000001124114516003651021637 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file TextFileCheck.h * @brief Class for checking two text files for equality. */ #ifndef TEXTFILECHECK_H #define TEXTFILECHECK_H // #include "util/Fs.h" #include "util/LineFile.h" #include "util/RegressionCheck.h" // #include /** * Class for testing that two text files are equal, except * for line endings. */ class TextFileCheck : public RegressionCheck { public: /** * Constructor. * * @param generatedFile File generated by application. * @param goldFile Comparison file, assumed to be correct. * @param skipLines Number of lines to skip. */ TextFileCheck(const std::string &generatedFile, const std::string &goldFile, const unsigned int skipLines) : m_GeneratedFile(generatedFile), m_GoldFile(goldFile), m_SkipLines(skipLines) { m_Name = Fs::basename(generatedFile); m_HeaderDelimiter = ""; } TextFileCheck(const std::string &generatedFile, const std::string &goldFile, const std::string &headerDelimiter) : m_GeneratedFile(generatedFile), m_GoldFile(goldFile), m_SkipLines(0), m_HeaderDelimiter(headerDelimiter) { m_Name = Fs::basename(generatedFile); } /** * Check that the two files are the same. * * @param errorMsg Error message generated if the test fails. * @return bool Returns true if files the same, else false. */ bool check(std::string& errorMsg) { // Open files. LineFile generatedStream; generatedStream.open(m_GeneratedFile, false); if(! generatedStream.is_open()) { errorMsg = "Unable to open generated file " + m_GeneratedFile; return false; } LineFile goldStream; goldStream.open(m_GoldFile, false); if(! goldStream.is_open()) { errorMsg = "Unable to open gold file " + m_GoldFile; return false; } unsigned int lineCount = 0; bool inHeader = !m_HeaderDelimiter.empty(); bool goldPastHeader = false; bool genPastHeader = false; std::string goldLine, generatedLine; while(! goldStream.eof() && ! goldStream.fail()) { if(!inHeader || !goldPastHeader) goldStream.getline(goldLine); if(!inHeader || !genPastHeader) generatedStream.getline(generatedLine); if(generatedStream.eof() && ! goldStream.eof()) { errorMsg = "The generated file, " + m_GeneratedFile + ", has fewer lines than the gold file, " + m_GoldFile; return false; } if(inHeader) { if(!goldPastHeader && ((goldLine.size() < m_HeaderDelimiter.size()) || (goldLine.substr(0, m_HeaderDelimiter.size()) != m_HeaderDelimiter))) { goldPastHeader = true; } if(!genPastHeader && ((generatedLine.size() < m_HeaderDelimiter.size()) || (generatedLine.substr(0, m_HeaderDelimiter.size()) != m_HeaderDelimiter))) { genPastHeader = true; } inHeader = !goldPastHeader || !genPastHeader; } // Skip header lines which need not be equal. if((++lineCount > m_SkipLines) && !inHeader) { if(goldLine != generatedLine) { errorMsg = "Mismatch reading generated file " + m_GeneratedFile + ":\ngold line: '" + goldLine + "'\ngenerated line: '" + generatedLine + "'"; return false; } } } // The two files should reach eof at the same time. if(! generatedStream.eof()) { errorMsg = "The generated file, " + m_GeneratedFile + ", has more lines than the gold file, " + m_GoldFile; return false; } return true; } private: /// Name of file generated by application being tested. std::string m_GeneratedFile; /// Name of file assumed to be correct. std::string m_GoldFile; /// Number of lines to skip. const unsigned int m_SkipLines; /// Header Delimiter std::string m_HeaderDelimiter; }; #endif /* TEXTFILECHECK_H */ affxparser/src/fusion/util/TmpFileFactory.cpp0000644000175200017520000001337114516003651022406 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2009 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /// @file TmpFileFactory.h /// @brief for usage examples, look at "util/test-tmpfilefactory.cpp" #ifdef _MSC_VER #define _CRT_SECURE_NO_WARNINGS #endif // #include "util/TmpFileFactory.h" // #include "portability/affy-system-api.h" #include "util/Fs.h" #include "util/Util.h" // #include #include #include #include #include ////////// // The global count of items allocated by this program. // We increment this counter for each file and dir. int TmpFileFactory::m_fileCount=0; int TmpFileFactory::incFileCounter() { m_fileCount++; return m_fileCount; } ////////// /// Declared static to scope it to this file. static TmpFileFactory* global_tmpfilefactory; TmpFileFactory* GlobalTmpFileFactory() { if (global_tmpfilefactory==NULL) { global_tmpfilefactory=new TmpFileFactory(); } if (global_tmpfilefactory==NULL) { Err::errAbort("GlobalTmpFileFactory: Unable to allocate."); } return global_tmpfilefactory; } void GlobalTmpFileFactoryFree() { if (global_tmpfilefactory!=NULL) { global_tmpfilefactory->cleanUp(); delete global_tmpfilefactory; } } ////////// TmpFileFactory::TmpFileFactory() { m_maxTrialCnt=10; // m_prefix="tmpfile-"; m_suffix=".tmp"; m_tmpdir="."; // m_opt_debug=0; m_opt_verbose=0; // look at the env var to see if debugging should be turned on. #ifdef unix char* val=getenv("APT_TMPFILE_DEBUG"); if (val!=NULL) { std::string val_string=val; if (val_string=="1") { m_opt_debug=1; } } #endif }; TmpFileFactory::~TmpFileFactory() { // get rid of the tmp files when we die cleanUp(); }; void TmpFileFactory::setDebug(int level) { m_opt_debug=level; } void TmpFileFactory::setVerbose(int level) { m_opt_verbose=level; } void TmpFileFactory::setTmpdir(const std::string& tmpdir) { m_tmpdir=tmpdir; } void TmpFileFactory::setPrefix(const std::string& prefix) { m_prefix=prefix; } void TmpFileFactory::setSuffix(const std::string& suffix) { m_suffix=suffix; } void TmpFileFactory::rememberToRemove(const std::string& filename) { m_tmpfilenames.push_back(filename); } void TmpFileFactory::rememberToRemoveDir(const std::string& dirname) { m_tmpdirnames.push_back(dirname); } ////////// void TmpFileFactory::cleanUp() { std::string path; for (size_t i=0;i0) { /// @todo: replace with "Log" std::cout << "TmpFileFactory::cleanUp(): rm '"+path+"'"<0) { /// @todo: replace with "Log" std::cout << "TmpFileFactory::cleanUp(): rmdir '"+path+"'"<=1) { std::cout<< "TmpFileFactory::genFilename()=='"+tmpfilename+"'\n"; } return tmpfilename; } if (trialcnt++>m_maxTrialCnt) { Err::errAbort("unable to allocate a tmpfile! Last filename tried: '"+tmpfilename+"'"); } } } std::string TmpFileFactory::genFilename(const std::string& prefix,const std::string& suffix) { std::string filename=genFilename_basic(prefix, suffix); // we will want to remove it later. rememberToRemove(filename); return filename; } std::string TmpFileFactory::genFilename() { // use the default prefix and suffix return genFilename(m_prefix,m_suffix); } std::vector TmpFileFactory::genFilenames(int cnt) { std::vector vec; for (int i=0;i #include #include // /// @brief Allocate temporary filenames and arrange to remove them when done. class TmpFileFactory { public: /// filenames we have allocated std::vector m_tmpfilenames; /// directory names we have allocated std::vector m_tmpdirnames; /// the directory in which the files will be allocated within. std::string m_tmpdir; /// the default prefix for the file and dir names. std::string m_prefix; /// the default suffix for the file names. std::string m_suffix; /// Controls the behavior to help debugging. int m_opt_debug; /// When set produces extra output for debugging. int m_opt_verbose; /// static int m_fileCount; int m_maxTrialCnt; /// @brief The default constructor. TmpFileFactory(); /// @brief The destructor virtual ~TmpFileFactory(); /// @brief Remove all the files and directories we have generated. /// Called from ~TmpFileFactory() void cleanUp(); /// @brief Return the next value of the file counter. /// @return the next value. static int incFileCounter(); /// @brief Sets the directory in which the tmp files will be allocated. /// @param dirname void setTmpdir(const std::string& tmpdir); /// @brief Generate and create a directory for tmp files. /// @return The directory path std::string makeTmpDir(); /// @brief Set the default prefix for tmp names. /// @param prefix The prefix to use. void setPrefix(const std::string& prefix); /// @brief Sets the default suffix (".tmp") for name from /// @param suffix void setSuffix(const std::string& suffix); /// @brief Generates a string which is unique for this run of the program. /// @return A 'unique' string. std::string genUniqueString(); /// @brief Generate a name which does not exist in the filesystem. /// Does NOT remember to delete the filename later. /// @param prefix prefix for the unique string. /// @param suffix suffix for the unique string. /// @return the unique pathname (dir+prefix+unique+suffix) std::string genFilename_basic(const std::string& prefix,const std::string& suffix); /// @brief Generates a unique filename with user supplied prefix and suffix. /// Remembers to delete it later. /// @param prefix user override to prefix /// @param suffix user override to suffix /// @return a unique pathname std::string genFilename(const std::string& prefix,const std::string& suffix); /// @brief Generates a unique filename with the default prefix and suffix. /// Remembers to delete it later. /// @return a unique file name. std::string genFilename(); /// @brief generate a vector of filenames in one call. /// @param cnt The number of filenames to generate. /// @return the vec of filenames. std::vector genFilenames(int cnt); /// @brief Generates and creates a unique directory /// @return the pathname to the directory. std::string genDirname(); /// @brief Remeber this filename to delete later. /// @param filename What to remove when "cleanUp()" is called. void rememberToRemove(const std::string& filename); /// @brief Remeber this dirname to delete later. /// @param filename What to remove when "cleanUp()" is called. void rememberToRemoveDir(const std::string& dirname); /// @brief Sets the debugging options /// 0=None /// 1=Dont remove the tmp files, leave them for debugging. /// @param level void setDebug(int level); /// @brief Sets the verbose level /// 0=No output /// 1 or more = logs more actions. /// @param level void setVerbose(int level); /// @brief Get The suggested tmp directory for this system. /// @return The suggested path name. std::string getSystemTmpDir(); }; // These two functions provide access to a global TmpFileFactory // Which can be used by everyone. Do remember to call // GlobalTmpFileFactoryFree as you exit main // to make sure it cleans up for you. /// @brief Returns a pointer to the global tmpfilefactory. /// Allocates it if needed. /// @return The global tmpfilefactory TmpFileFactory* GlobalTmpFileFactory(); /// @brief Frees the global tmpfilefactory. void GlobalTmpFileFactoryFree(); #endif /* _TMPFILEFACTORY_H_ */ affxparser/src/fusion/util/Util.cpp0000644000175200017520000007322514516003651020437 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file Util.cpp * @author Chuck Sugnet * @date Mon May 16 16:04:48 2005 * * @brief General Utilities. */ // #include "util/Util.h" // #include "calvin_files/utils/src/StringUtils.h" #include "util/Convert.h" #include "util/Err.h" #include "util/Fs.h" #include "util/RowFile.h" #include "util/TableFile.h" #include "util/Verbose.h" // #include #include #include #include #include #include #include #include #include #include #include #include // silences some warnings. #include "portability/apt-no-warnings.h" #define POSIX_OPEN open #define POSIX_CLOSE close #define POSIX_MKDIR mkdir #ifdef WIN32 #include #include #include #include #include #include #include #define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) #define BUFFSIZE 10000 // #ifndef POSIX_OPEN #define POSIX_OPEN _open #endif #ifndef POSIX_CLOSE #define POSIX_CLOSE _close #endif #ifndef POSIX_MKDIR #define POSIX_MKDIR _mkdir #endif #else #include #endif /* WIN32 */ // OS stuff for memInfo, getAvailableDiskSpace, isSameVolume #ifdef __APPLE__ #include #include #include #endif #ifdef __linux__ #include #include #endif #ifdef __sun__ #include #include #endif #ifdef WIN32 #include void sleep(unsigned int ms) { clock_t target = ms + clock(); while (target > clock()); } #endif /*WIN32*/ using namespace std; /** * Create a copy of a string. Free this with delete [] (or freezArray()) when * done. * @param s - c-string to be copied. * @return char * newly allocated c-string. */ char *Util::cloneString(const char *s) { char *copy = NULL; if(s == NULL) return NULL; size_t length = strlen(s); // copy = new ( sizeof(char) * (length+1) ); copy = new char[length + 1]; // strncpy is deprecated // strncpy(copy, s, length+1); memcpy(copy,s,length+1); return copy; } /* //todo vliber version char *Util::cloneString(const string &s) { char *copy = NULL; if(s.c_str() == NULL) return NULL; size_t length = strlen(s.c_str()); copy = new char[length + 1]; // strcpy(copy,length+1,s.c_str()); strcpy(copy,s.c_str()); //malloc(s.length()+1); //strcpy(copy,s.length()+1,s.c_str()); return copy; } */ bool Util::stringEndsWith(const std::string& str,const std::string& end) { std::string::const_reverse_iterator s_i=str.rbegin(); std::string::const_reverse_iterator s_end=str.rend(); std::string::const_reverse_iterator e_i=end.rbegin(); std::string::const_reverse_iterator e_end=end.rend(); // see if the whole thing end is there. while (e_i!=e_end) { if ((s_i==s_end) || // ran out of input string ((*e_i)!=(*s_i)) // mismatch ) { return false; // didnt end with 'end'. } // advance (backwards!) e_i++; s_i++; } // 'end' was at the end of the string. return true; } bool Util::endsWithStr(const std::string& str,const std::string& ending,int charsAtEnd) { if (ending.size()>str.size()) { return false; } std::string tmpstr=str.substr(str.size()-(ending.size()+charsAtEnd),ending.size()); return tmpstr==ending; } bool Util::endsWithStr(const std::string& str,const std::string& ending) { return endsWithStr(str,ending,0); } /** * @brief Chop off the last character if it is a path separator. * windows stat() can't handle having it there. * @param s - string to have '/' or '\' chopped off if last. */ void Util::chompLastIfSep(std::string &s) { string::size_type i = s.rfind(Fs::osPathSep()); if(i != string::npos && i == s.length() -1) s.erase(i); } /** * Chop the last suffix (as defined by '.') from a string * @param - string to chop * @param - delimiter, default '.' */ std::string Util::chopSuffix(const std::string& s, char d) { string::size_type pos = s.rfind(d); if(pos != string::npos) { return s.substr(0,pos); } return s; } #ifdef WIN32 // Windows doesn't seem to have these defined, so we'll define them // here for utility. Note that with the windows _stat() function all // users have id 0 and group 0. #ifndef __MINGW32__ #define S_IRUSR 00400 //owner has read permission #define S_IWUSR 00200 //owner has write permission #define S_IXUSR 00100 ///owner has execute permission #endif #define S_IRWXG 00070 //mask for group permissions #define S_IRGRP 00040 //group has read permission #define S_IWGRP 00020 //group has write permission #define S_IXGRP 00010 //group has execute permission #define S_IRWXO 00007 //mask for permissions for others (not in group) #define S_IROTH 00004 //others have read permission #define S_IWOTH 00002 //others have write permisson #define S_IXOTH 00001 // others have execute permission #endif void Util::breakByString(const std::string &s, const std::string &delim, std::vector &words) { words.clear(); int curPos = 0; APT_ERR_ASSERT(delim.length() > 0, "delim must be non-empty."); while(1) { size_t pos = s.find(delim,curPos); if (pos == string::npos) { words.push_back(s.substr(curPos, s.size())); break; } string sub = s.substr(curPos, pos - curPos); words.push_back(sub); curPos = pos + delim.size(); if ( curPos >= s.length() ) { break; } } } /** * Chop up a string into a vector of words. * * @param s - string of interest. * @param delim - delimiter to split on. * @param words - vector to put words into, will be cleared then filled. */ /// @todo shouldnt this be "split"? void Util::chopString(const std::string& s,const char delim,std::vector& words) { string::size_type len = 0, start = 0, next = 0; words.clear(); len = s.length(); while(start < len) { next = s.find(delim, start); if(next == string::npos) { next = s.size(); // entire string. } words.push_back(s.substr(start, next - start)); start = next+1; } } /** * Chop up a string into a vector of words. * * @param s - string of interest. * @param delims - delimiters to split on. The split will occur at any character * - among those present in the string. * @param words - vector to put words into, will be cleared then filled. */ void Util::chopString(const std::string& s, const char* delims, std::vector& words) { string::size_type len = 0, start = 0, next = 0; words.clear(); len = s.length(); while(start < len) { next = s.find_first_of(delims, start); if(next == string::npos) { next = s.size(); // entire string. } words.push_back(s.substr(start, next - start)); start = next+1; } } /** * Check each entry in two matrices to see if they are the same. If * doing 'match-rows' we will attempt to find the matching row by the * row name. * * @param targetFile - File to read target matrix from. * @param queryFile - File to read the query matrix from. * @param colSkip - How many of initial columns to ignore (i.e. row names) * @param rowSkip - How many of initial rows to ignore (i.e. column header) * @param epsilon - What is the tolerance of difference. * i.e. if q[i][j] - t[i][j] >= epsilon then there is a difference. * @param printMismatch - Should we print out the cases where difference is >= epsilon * @param matchRows - Should we try to match the rows based on the row identifiers. * @param fraction - What fractional difference is tolerated, test for value equivalence (not used by default). * i.e. if q[i][j] - t[i][j] < fraction*max( |q[i][j]|, |t[i][j]| ) * * @return - Number of differences >= epsilon found. */ int Util::matrixDifferences(const std::string& targetFile, const std::string& queryFile, int colSkip, int rowSkip, double epsilon, bool printMismatch, bool matchRows, double fraction, int printMismatchMax ) { vector< vector > qMatrix, tMatrix; unsigned int rowIx = 0, colIx = 0; unsigned int numCol = 0, numRow = 0; bool same = true; map qMatrixMap; vector qMatrixRows, tMatrixRows; unsigned int diffCount = 0; unsigned int rowDiffCount = 0; double maxDiff = 0; std::string tmp_target_name=Fs::convertToUncPath(targetFile); Verbose::out(2, "Reading in file: "+tmp_target_name); RowFile::matrixFromFile(tmp_target_name, qMatrix, rowSkip, colSkip); std::string tmp_query_name=Fs::convertToUncPath(queryFile); Verbose::out(2, "Reading in file: " +tmp_query_name); RowFile::matrixFromFile(tmp_query_name, tMatrix, rowSkip, colSkip); /* If we are matching by rows grab the first column from each file and assume it is the unique row names. Create a map from the names to the indexes for qMatrix */ if (matchRows) { colIx = 0; Verbose::out(2, "Reading in rownames."); // @todo these are named backwards. (q/t) TableFile::columnFromFile(tmp_target_name, qMatrixRows, colIx, rowSkip, true); TableFile::columnFromFile(tmp_query_name, tMatrixRows, colIx, rowSkip, true); // for(rowIx = 0; rowIx < qMatrixRows.size(); rowIx++) { if(qMatrixMap.find(qMatrixRows[rowIx]) != qMatrixMap.end()) { Err::errAbort("Duplicate row names: " + qMatrixRows[rowIx] + " in matrix 1"); } qMatrixMap[qMatrixRows[rowIx]] = rowIx; } } Verbose::out(2,"Looking for differences."); if((!matchRows && qMatrix.size() != tMatrix.size()) || qMatrix[0].size() != tMatrix[0].size()) { Err::errAbort("Matrices are different sizes, not comparable."); } numRow = tMatrix.size(); numCol = tMatrix[0].size(); for(rowIx = 0; rowIx < numRow; rowIx++) { bool rowDiff = false; for(colIx = 0; colIx < numCol; colIx++) { int qMatRowIx = rowIx; /* If doing match rows, look up the correct row based on row name. */ if(matchRows) { if(qMatrixMap.find(tMatrixRows[rowIx]) == qMatrixMap.end() ) Err::errAbort("Can't find rowname: " + tMatrixRows[rowIx] + " in matrix 1"); else qMatRowIx = qMatrixMap[tMatrixRows[rowIx]]; } double val = qMatrix[qMatRowIx][colIx] - tMatrix[rowIx][colIx]; maxDiff = Max(maxDiff, fabs(val)); // allowed absolute difference from fractional tolerance (zero by default) double epsilon2 = fraction*Max( fabs(qMatrix[qMatRowIx][colIx]), fabs(tMatrix[rowIx][colIx]) ); // absolute difference is acceptable if it satisfies either (least restrictive) tolerance bool failed = (fabs(val) > Max(epsilon,epsilon2)); bool okFinite = true; okFinite &= isFinite(qMatrix[qMatRowIx][colIx]); okFinite &= isFinite(tMatrix[rowIx][colIx]); if(!okFinite) Verbose::out(2, "Non-finite floating point numbers at row: " + ToStr(rowIx) + " column: " + ToStr(colIx)); if(failed || !okFinite) { same = false; rowDiff = true; diffCount++; // report mismatches up to maximum count, if set, with warning if maximum count exceeded. if( printMismatch && ((int)diffCount<=printMismatchMax || printMismatchMax<0) ) { Verbose::out(2, "row: " + ToStr(rowIx) + " col: " + ToStr(colIx) + " (" + ToStr(qMatrix[qMatRowIx][colIx]) + " vs. " + ToStr( tMatrix[rowIx][colIx]) + ")" + " Diff: " + ToStr(val)); } if( printMismatch && diffCount==printMismatchMax+1 && printMismatchMax>0 ) { Verbose::out(2, "Number of differences exceeds maximum number (" + ToStr(printMismatchMax) + ") to report."); } } } if(rowDiff) rowDiffCount++; } if(maxDiff > 0) { Verbose::out(2, "Max difference is: " + ToStr(maxDiff)); } if(same) Verbose::out(2,"Same."); else { unsigned int total = numRow * numCol; float percent = (float) rowDiffCount / numRow * 100; Verbose::out(2, "Different in " + ToStr(rowDiffCount) + " of " + ToStr(numRow) + " ( " + ToStr(percent) + "% ) rows."); percent = (float) diffCount / total * 100; Verbose::out(2, "Different at " + ToStr(diffCount) + " of " + ToStr(total) + " ( " + ToStr(percent) + "% ) values."); } return diffCount; } void Util::replaceString(std::string &s, const std::string &from, const std::string &to) { vector words; breakByString(s, from.c_str(), words); std::ostringstream ss; ss << words[0]; for (int i = 1; i < words.size(); i++) { ss << to; ss << words[i]; } s = ss.str(); } /** * Schrage's algorithm for generating random numbers in 32 bits. * @param ix - pointer to integer seed, cannot be zero. */ int32_t Util::schrageRandom(int32_t *ix) { int32_t a = 16807, p = 2147483647, xhi = 0, xlo = 0; int32_t rand = 0; int32_t r = p % a; int32_t q = p / a; Err::check(*ix > 0, "Error: Util::schrageRandom() - Cannot seed with 0"); /* Break into high and low bits to avoid overflow. */ xhi = (*ix) / q; xlo = (*ix) - xhi*q; /* Combine high and low. */ (*ix) = a * xlo - r * xhi; if((*ix) < 0) { (*ix) = (*ix) + p; } rand = (*ix); return rand; } std::string Util::asMB(uint64_t x) { return ToStr(x/MEGABYTE) + "MB"; } #ifdef WIN32 #define memInfo_defined 1 /// @brief Query windows for its memInfo. /// @param free The amount of free memory. (bytes) /// @param total The physical memory installed (bytes) /// @param swapAvail The amount of swap avail. (bytes) /// @param memAvail Suggested amount of memory to use. /// @return true on success bool memInfo_win32(uint64_t &free, uint64_t &total, uint64_t &swapAvail, uint64_t& memAvail) { MEMORYSTATUSEX statex; statex.dwLength = sizeof(statex); if(GlobalMemoryStatusEx(&statex) == 0) { Err::errAbort("Util::memInfo() - Could not determine memory usage with: GlobalMemoryStatusEx()."); } free = statex.ullAvailPhys; total = statex.ullTotalPhys; swapAvail = statex.ullAvailVirtual - statex.ullAvailPhys; memAvail = (uint64_t)(free * 0.90); return true; } #endif #ifdef __linux__ #define memInfo_defined 1 /// @brief Read and parse the contents of /proc/meminfo on 2.4 and 2.6 systems. /// @param proc_meminfo_filename File to parse (changeable for testing) /// @param free The amount of free memory. (bytes) /// @param total The physical memory installed (bytes) /// @param swapAvail The amount of swap avail. (bytes) /// @param memAvail Suggested amount of memory to use. /// @return true on success bool memInfo_linux(std::string proc_meminfo_filename, uint64_t &free, uint64_t &total, uint64_t &swapAvail, uint64_t& memAvail) { std::ifstream proc_meminfo; std::string key; std::string line; //char line_buf[1024]; //std::istringstream line_stm(line_buf,sizeof(line_buf)); uint64_t val; std::string units; uint64_t buffersUsed=0, cached=0; free=total=swapAvail=memAvail=0; // 2.4 // total: used: free: shared: buffers: cached: // Mem: 4097478656 2765283328 1332195328 0 219951104 2328576000 // Swap: 3224301568 0 3224301568 // 2.6 // MemTotal: 4059404 kB // MemFree: 2169240 kB // Buffers: 33776 kB // Cached: 1618328 kB Fs::aptOpen(proc_meminfo, proc_meminfo_filename); while (!proc_meminfo.eof()) { getline(proc_meminfo,line); //cout<<"L: "<> key; line_stm >> val; line_stm >> units; // Handle the linux-2.4 case. if (key=="total:") { std::string ignore; std::string line_mem; getline(proc_meminfo,line_mem); std::istringstream line_stm_24_mem(line_mem); line_stm_24_mem >> key; line_stm_24_mem >> total; // total: line_stm_24_mem >> ignore; // used: line_stm_24_mem >> free; // free: line_stm_24_mem >> ignore; // shared: line_stm_24_mem >> buffersUsed; // buffers: // std::string line_swap; getline(proc_meminfo,line_swap); std::istringstream line_stm_24_swap(line_swap); line_stm_24_swap >> ignore; // Swap: line_stm_24_swap >> swapAvail; // we dont need any more data break; } // //printf("%-20s=%10lu %4s\n",key.c_str(),val,units.c_str()); // if (units=="kB") { val=val*1024; } else if (units=="MB") { val=val*1024*1024; } else { // just give up if the units arent there. break; // assert(0); } // if (key=="MemTotal:") { total=val; } else if (key=="MemFree:") { free=val; } else if (key=="SwapFree:") { swapAvail=val; } else if (key=="Buffers:") { buffersUsed=val; } else if (key=="Cached:") { cached=val; } } proc_meminfo.close(); memAvail=(uint64_t)(free+0.90*buffersUsed+cached); return true; } #endif #ifdef __APPLE__ #define memInfo_defined 1 /// @brief Query the mach kernel for memInfo. /// @param free The amount of free memory. (bytes) /// @param total The physical memory installed (bytes) /// @param swapAvail Darwin can use all the disk space, return 0 /// @param memAvail Suggested amount of memory to use. /// @return true on success bool memInfo_darwin(uint64_t &free, uint64_t &total, uint64_t &swapAvail, uint64_t& memAvail) { // ask mach struct vm_statistics vm_stat; vm_size_t my_pagesize; mach_port_t my_host=mach_host_self(); mach_msg_type_number_t count = HOST_VM_INFO_COUNT; if (host_page_size(my_host,&my_pagesize)!=KERN_SUCCESS) { Err::errAbort("Util::memInfo() - Didnt succeed with 'host_page_size'."); } if (host_statistics(my_host, HOST_VM_INFO,(integer_t*)&vm_stat,&count) != KERN_SUCCESS) { Err::errAbort("Util::memInfo() - Didnt succeed with 'host_statistics'."); } // there isnt a total in the struct; sum it up. total=(vm_stat.free_count+vm_stat.active_count+vm_stat.inactive_count+vm_stat.wire_count); // THEN multiply by the page size. (a 64b multiply) total=total*my_pagesize; //#define PRINT_IT(x) printf("%-30s: %8d\n",#x,x) // PRINT_IT(vm_stat.free_count); // PRINT_IT(vm_stat.active_count); // PRINT_IT(vm_stat.inactive_count); // PRINT_IT(vm_stat.wire_count); // should we add in vm_stat.inactive_count? free=vm_stat.free_count; free=free*my_pagesize; // darwin can use all the disk space -- report zero swapAvail=0; // our guess as to what we can allocate memAvail=(uint64_t)(0.90*(vm_stat.free_count+vm_stat.inactive_count)*my_pagesize); return true; } #endif #ifdef __sun__ #define memInfo_defined 1 /// @brief Query a solaris kernel for memInfo /// @param free The amount of free memory. (bytes) /// @param total The physical memory installed (bytes) /// @param swapAvail The amount of swap avail. (always zero.) /// @param memAvail Suggested amount of memory to use. /// @return true on success bool memInfo_solaris(uint64_t &free, uint64_t &total, uint64_t &swapAvail, uint64_t& memAvail) { uint64_t page_size=sysconf(_SC_PAGESIZE); free =sysconf(_SC_AVPHYS_PAGES)*page_size; total =sysconf(_SC_PHYS_PAGES )*page_size; // figure we can use a quarter of the allocated memory memAvail=free+((total-free)/4); // dont want to call swapctl... swapAvail=0; return true; } #endif #ifdef __which_system_was_this_for__ #define memInfo_defined 1 /// @brief Use linux sysinfo to get linux memInfo. (the old interface) /// @param free The amount of free memory. (bytes) /// @param total The physical memory installed (bytes) /// @param swapAvail The amount of swap avail. (bytes) /// @param memAvail Suggested amount of memory to use. /// @return true on success bool memInfo_sysinfo(uint64_t &free, uint64_t &total, uint64_t &swapAvail, uint64_t& memAvail) { struct sysinfo info; if(sysinfo(&info) != 0) { Err::errAbort("Util::memInfo() - Could not determine memory usage with: sysinfo()."); } free = info.freeram * info.mem_unit; total = info.totalram * info.mem_unit; swapAvail = info.freeswap * info.mem_unit; // We use this as a guesstimate of how much ram we can safely allocate on the system. // The factor of 0.90 is to leave a bit of memory for other users. memAvail=(uint64_t)(free+0.90*(info.bufferram*info.mem_unit)); return true; } #endif bool Util::memInfo(uint64_t &free, uint64_t &total, uint64_t &swapAvail, uint64_t& memAvail, bool cap32bit) { bool success = false; bool is32bit = true; free=total=swapAvail=memAvail=0; // One of these should be defined. ///@todo is there a more robust way to determine 32 vs 64 bitness? #ifdef WIN32 #ifdef _WIN64 is32bit = false; #else is32bit = true; #endif success=memInfo_win32(free,total,swapAvail,memAvail); #endif //WIN32 #ifdef __linux__ #ifdef __LP64__ is32bit = false; #else is32bit = true; #endif success=memInfo_linux("/proc/meminfo",free,total,swapAvail,memAvail); #endif //__linux__ #ifdef __APPLE__ #ifdef __LP64__ is32bit = false; #else is32bit = true; #endif success=memInfo_darwin(free,total,swapAvail,memAvail); #endif //__APPLE__ #ifdef __sun__ success=memInfo_solaris(free,total,swapAvail,memAvail); #endif // __sun__ if (memAvail>MEMINFO_2GB_MAX && (is32bit || cap32bit)) { memAvail=MEMINFO_2GB_MAX; } return success; } /** * Return a pointer to the next character that is white space * or NULL if none found. * @param s - cstring to find white space in. * @return - Pointer to next whitespace character or NULL if none * found. */ const char *Util::nextWhiteSpace(const char *s) { while(s[0] != '\0' && !isspace(s[0])) { s++; } return s; } /** * Print a string wrapping at max width from the current * position. * @param out - stream to output string to. * @param str - The cstring to be printed. * @param prefix - How many spaces to put on begining of newline. * @param maxWidth - Where to wrap text at. * @param currentPos - What position in the line is * cursor currently at. */ void Util::printStringWidth(std::ostream &out,const std::string& str, int prefix, int currentPos, int maxWidth ) { const char *wStart = NULL, *wEnd = NULL; /* Start and end of word pointers. */ int position = currentPos; int nextSize = 0; int i = 0; wStart = str.c_str(); /* While there are still characters to be printed. */ while(*wStart != '\0') { /* Clean out any whitespace. */ while(isspace(*wStart) && *wStart != '\0') { if(*wStart == '\n') { out.put('\n'); for(i = 0; i < prefix; i++) out.put(' '); fflush(stdout); position = prefix; } // WAS '*wStart++' but that is unnecessary and gives compile // warning on "expression result unused". wStart++; } if(*wStart == '\0') break; /* Find the end of current word. */ wEnd = wStart; while(!isspace(*wEnd) && *wEnd != '\0') wEnd++; /* Time for a newline? */ if((wEnd - wStart) + position >= maxWidth) { out.put('\n'); for(i = 0; i < prefix; i++) out.put(' '); position = prefix; } /* Print out the word. */ while(wStart < wEnd) { out.put(*wStart); fflush(stdout); wStart++; position++; } /* Look to see where next word is. */ while(isspace(*wEnd)) { if(*wEnd == '\n') { out.put('\n'); for(i = 0; i < prefix; i++) out.put(' '); position = prefix; } wEnd++; } /* Figure out the size of the next word. */ wStart = nextWhiteSpace(wEnd); if(wStart != NULL) nextSize = wStart - wEnd; else nextSize = 0; /* Print a space if we're not going to print a newline. */ if(*wEnd != '\0' && nextSize + position < maxWidth && position != 0) { out.put(' '); position++; } wStart = wEnd; } } /** * Wrapper for different version of isnan() on different systems. * @param x - number to be checked for NaN or INF * @return - true if x is finite (-INF < x && x < +INF && x != nan), false otherwise */ bool Util::isFinite(double x) { bool isOk = false; #ifdef WIN32 isOk = _finite(x); #else isOk = isfinite(x); #endif return isOk; } std::string Util::getTimeStamp() { time_t now=time(NULL); const char* ctime_ptr=ctime(&now); APT_ERR_ASSERT(ctime_ptr != NULL, "Got null value from ctime()"); std::string ctime_str=ctime_ptr; trimString(ctime_str); return ctime_str; } void Util::changeEnd(std::string& str,const std::string& from,const std::string& to) { size_t pos=str.rfind(from); if (pos!=string::npos) { str=str.substr(0,pos)+to; } } void Util::changeEnd(std::vector& str_vec,const std::string& from,const std::string& to) { for (int i=0;i Util::listToVector(const char* in[]) { std::vector fullName; for (int i = 0; in[i] != NULL; i++) { fullName.push_back(in[i]); } return fullName; } std::vector Util::listToVector(const char* in[], int size) { std::vector fullName; for (int i = 0; i < size; i++) { fullName.push_back(in[i]); } return fullName; } std::vector Util::addPrefixSuffix(std::vector middle, const std::string &prefix, const std::string &suffix) { std::vector fullName; for (std::vector::iterator i = middle.begin(); i < middle.end();i++) { fullName.push_back(Fs::Unc(prefix + (*i) + suffix)); } return fullName; } std::vector Util::addPrefixSuffix(const char* middle[], const std::string &prefix, const std::string &suffix) { std::vector fullName = addPrefixSuffix(listToVector(middle), prefix, suffix); return fullName; } std::vector Util::addPrefixSuffix(const char* middle[], int size, const std::string &prefix, const std::string &suffix) { std::vector fullName = addPrefixSuffix(listToVector(middle, size), prefix, suffix); return fullName; } std::vector Util::addPrefixSuffix(const char* middle[], const std::string &prefix) { std::vector fullName = addPrefixSuffix(listToVector(middle), prefix, ""); return fullName; } //String Stuff //Assumes a sentintel exists (equal to "") as the last element in the array. std::vector Util::listToVector(std::string in[]) { std::vector fullName; for (int i = 0; in[i] != ""; i++) { fullName.push_back(in[i]); } return fullName; } std::vector Util::listToVector(std::string in[], int size) { std::vector fullName; for(int i = 0; i < size; i++) { fullName.push_back(in[i]); } return fullName; } std::vector Util::addPrefixSuffix(std::string middle[], const std::string &prefix, const std::string &suffix) { std::vector fullName = addPrefixSuffix(listToVector(middle), prefix, suffix); return fullName; } std::vector Util::addPrefixSuffix(std::string middle[], int size, const std::string &prefix, const std::string &suffix) { std::vector fullName = addPrefixSuffix(listToVector(middle,size), prefix, suffix); return fullName; } std::string Util::joinVectorString(std::vector toJoin, const std::string &sep) { std::string fullString=""; for (std::vector::iterator i = toJoin.begin(); i < toJoin.end();i++) { fullString += (*i)+sep; } return fullString; } Util::StaticMem &Util::getStaticMem() { static Util::StaticMem mem; return mem; } uint64_t Util::getMemFreeAtStart() { Util::StaticMem &mem = getStaticMem(); return mem.getMemFreeAtStart(); } uint64_t Util::getMemFreeAtBlock() { Util::StaticMem &mem = getStaticMem(); return mem.getMemFreeAtBlock(); } void Util::popMemFreeAtStart() { Util::StaticMem &mem = getStaticMem(); mem.popMemFreeAtStart(); } void Util::pushMemFreeAtStart() { Util::StaticMem &mem = getStaticMem(); uint64_t freeRam = 0, totalRam = 0, swapAvail = 0, memAvail = 0; Util::memInfo(freeRam, totalRam, swapAvail, memAvail, false); mem.pushMemFreeAtStart(memAvail); } std::string Util::toString( const std::wstring &src ) { char* szSource = new char[ src.length()+1 ]; wcstombs( szSource, src.c_str(), src.length()+1 ); std::string result(szSource); delete [] szSource; return result; } affxparser/src/fusion/util/Util.h0000644000175200017520000004125714516003651020104 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file Util.h * @author Chuck Sugnet * @date Mon May 16 15:52:31 2005 * * @brief General Utilities. */ #ifndef _UTIL_H_ #define _UTIL_H_ /// uncomment this to remove functions which are //#define APT_OBSOLETE_FUNCS 1 // #include "portability/affy-base-types.h" #include "portability/apt-win-dll.h" #include "util/Convert.h" #include "util/Err.h" // #include #include #include #include #include #include #include #define MEGABYTE 1048576 /// 386 systems cant map more than 2GB of user memory. /// (unless you have a patched kernel...) #ifdef WIN32 #ifdef _WIN64 #define MEMINFO_2GB_MAX (2UL*1024*1024*1024) #else // cap at 1.3G on Windows due to expected memory fragmentation /// @todo This was 1.7G on head before merge with python branch. Further work on genotype engine may allow for bumping this back up some. #define MEMINFO_2GB_MAX floor(1.3f*1024*1024*1024) #endif #else #define MEMINFO_2GB_MAX (2UL*1024*1024*1024) #endif /// Log base two #define log2(x) ( log((x))/log((2.0)) ) /// Square of the two numbers /// (Note: this is bad style due to X being evaled twice.) #define sqr(x) ((x)*(x)) /// Returns the number of elements in the array. #define ArraySize(a) (sizeof(a)/sizeof((a)[0])) /** delete function that deletes the pointer and sets it to NULL. */ template inline void Freez(T*& p) { delete p; p = NULL; } /** delete function that deletes an array and sets the pointer to NULL. */ template inline void FreezArray(T*& p) { delete[] p; p = NULL; } /** * @name InstanceOf * * Check at run time if an object is an instance of a class. This differs from * comparing the result of type_id, as it takes inheritance into account. Note * that base class must be polymorphic, that is it must have a virtual function * or inherit from another class. * * @param objPtr A pointer to the object to check. A pointer must be used, not * an object. If necessary, generate an address. * @param The class name to test against. * @return true if the object is an instance of the class. */ #define InstanceOf(objPtr, className) \ ((bool)(dynamic_cast(objPtr) != NULL)) /** * Util * @brief Utility functions for memory, etc. */ class APTLIB_API Util { public: /** * Make the string all lowercase. * @param s - string to be modified */ static void downcaseString_inplace(std::string& str) { for(unsigned int i = 0; i < str.size(); i++) { str[i] = (char)tolower(str[i]); } } /// @brief Copy and downcase a 8bit string. /// @param str_orig /// @return static std::string downcaseString(const std::string& str_orig) { std::string str=str_orig; downcaseString_inplace(str); return str; } /** * Make the string all uppercase. * @param s - string to be modified */ static void upcaseString_inplace(std::string& str) { for(unsigned int i = 0; i < str.size(); i++) { str[i] = (char)toupper(str[i]); } } /// @brief Copy and upcase a 8bit string. /// @param str_orig /// @return static std::string upcaseString(const std::string& str_orig) { std::string str=str_orig; upcaseString_inplace(str); return str; } /** Comparison object for use in map for char * */ struct ltstr { /// Is one string less than another? bool operator()(const char* s1, const char* s2) const { return strcmp(s1, s2) < 0; } }; struct ltstring { /// Is one string less than another? bool operator()(const std::string& s1, const std::string& s2) const { return strcmp(s1.c_str(), s2.c_str()) < 0; //return s1.compare(s2)<0; //vliber } }; /** * Get a timestamp string. * @return - pointer to statically allocated buffer with time. */ static std::string getTimeStamp(); /** * Some older compilers (solaris) don't have a round function. */ static int round(double x) { return ((int)((x)+0.5)); } /** * Create a copy of a string. Free this with delete [] (or freezArray()) when * done. * @param s - c-string to be copied. * @return char * newly allocated c-string. */ static char *cloneString(const char *); // one shouldnt need this. -jhg // static char *cloneString(const std::string &); /** * @brief Does the string end with the other string? * @param str string to check * @param endstr ending string. * @return true if str ends with endstr. */ static bool stringEndsWith(const std::string& str,const std::string& end); // like AffxString but uses std::string. /// @brief Check to see if STR ends with ENDING. /// @param str /// @param ending /// @return true if it does. static bool endsWithStr(const std::string& str,const std::string& ending); /// @brief Checks to see if STR ends with ENDING ignoring the last POSFROMEND chars. /// Basicly the POSFROMEND acts as a wildcard "." at the end of the string. /// @param str /// @param ending /// @param posFromEnd number /// @return true if it does static bool endsWithStr(const std::string& str,const std::string& ending,int posFromEnd); //#ifndef APT_OBSOLETE_FUNCS /** * @brief Chop off the last character if it is a path separator. * windows stat() can't handle having it there. * @param s - string to have '/' or '\' chopped off if last. */ static void chompLastIfSep(std::string &s); /** * Chop the last suffix (as defined by '.') from a string * @param - string to chop * @param - delimiter, default '.' */ static std::string chopSuffix(const std::string& s, char d = '.'); /** * Chop up a string into a vector of words. * * @param s - string of interest. * @param delim - delimiter to split on. * @param words - vector to put words into, will be cleared then filled. */ static void chopString(const std::string& s,const char delim,std::vector& words); /** * Chop up a string into a vector of words. * * @param s - string of interest. * @param delims - delimiters to split on. The split will occur at any character * - among those present in the string. * @param words - vector to put words into, will be cleared then filled. */ static void chopString(const std::string& s, const char* delims, std::vector& words); /** * @brief Cut off any preceding and trailing white space. * @param s - String to be trimmed. * @param whitespace - String with characters to be trimmed. */ static void trimString(std::string& s, const char *whitespace = " \r\n\t") { s = s.erase(s.find_last_not_of(whitespace)+1); s = s.erase(0,s.find_first_not_of(whitespace)); } /// @brief Change the end of a string /// @param str string to change /// @param from string to change from /// @param to string to change to static void changeEnd(std::string& str,const std::string& from,const std::string& to); /// @brief Change the endings of a vector of strings. /// @param str_vec vector to change /// @param from string to change from /// @param to string to change to static void changeEnd(std::vector& str_vec,const std::string& from,const std::string& to); /** * @brief Check to see if two strings are the same. * * @param s1 - string 1. * @param s2 - string 2. * * @return true if compare() considers them the same. */ static bool sameString(const std::string &s1, const std::string &s2) { return s1.compare(s2) == 0; } /** * Check each entry in two matrices to see if they are the same. If * doing 'match-rows' we will attempt to find the matching row by the * row name. * * @param targetFile - File to read target matrix from. * @param queryFile - File to read the query matrix from. * @param colSkip - How many of initial columns to ignore (i.e. row names) * @param rowSkip - How many of initial rows to ignore (i.e. column header) * @param epsilon - What is the tolerance of difference. * i.e. if q[i][j] - t[i][j] >= epsilon then there is a difference. * @param printMismatch - Should we print out the cases where difference is >= epsilon * @param matchRows - Should we try to match the rows based on the row identifiers. * @param fraction - Maximum fractional difference considered equivalent (default: not used). * @param printMismatchMax - Maximum number of mismatches to print (default: no limit). * * @return - Number of differences >= epsilon found. */ static int matrixDifferences(const std::string& targetFile, const std::string& queryFile, int colSkip, int rowSkip, double epsilon, bool printMismatch, bool matchRows, double fraction = 0.0, int printMismatchMax = -1); /** * Replace the from character with the to character for * all instances in string supplied. * * @param s - string to do the replacement in. * @param from - original character to be replaced. * @param to - character to do the replacing with. */ static void subChar(std::string &s, char from, char to) { std::string::size_type pos = 0; for(pos = 0; pos < s.size(); pos++) { if(s[pos] == from) { s[pos] = to; } } } /** * Replace the from string with the to string for * all instances in string supplied. * * @param s - string to do the replacement in. * @param from - original string to be replaced. * @param to - string to do the replacing with. */ static void replaceString(std::string &s, const std::string &from, const std::string &to); static void breakByString(const std::string &s, const std::string &delim, std::vector &words); static void removeChar(std::string &s, char character) { std::ostringstream buf; for (size_t i = 0; i < s.length(); i++) { if (s.at(i) != character) { buf << s.at(i); } } s = buf.str(); } static bool is32Bit() { bool is32 = true; int nBytes = sizeof(size_t); if (nBytes == 4) { is32 = true; } else if (nBytes == 8) { is32 = false; } #ifdef _WIN64 is32 = false; #endif #ifdef __LP64__ is32 = false; #endif return is32; } /** * Schrage's algorithm for generating random numbers in 32 bits. * @param ix - pointer to integer seed, cannot be zero. */ static int32_t schrageRandom(int32_t *ix); static std::string asMB(uint64_t x); /** * Determine the free and total amount of memory in bytes on this machine. * * @param free - Bytes available currently. * @param total - Total bytes installed on machine. * @param swapAvail - Amount of swap available on machine. OSX can use all disk space so reports 0 on OSX * @param memAvail - Amount of space we should consider available. * @param cap32bit - Cap the memory at 4GB. * * @return true if successful, false otherwise. */ static bool memInfo(uint64_t &free, uint64_t &total, uint64_t &swapAvail,uint64_t& memAvail, bool cap32bit=true); static uint64_t getMemFreeAtStart(); static uint64_t getMemFreeAtBlock(); static void pushMemFreeAtStart(); static void popMemFreeAtStart(); /** * Return a pointer to the next character that is white space * or NULL if none found. * @param s - cstring to find white space in. * @return - Pointer to next whitespace character or NULL if none * found. */ static const char *nextWhiteSpace(const char *); /** * Print a string wrapping at max width from the current * position. * @param out - stream to output string to. * @param str - The cstring to be printed. * @param prefix - How many spaces to put on begining of newline. * @param maxWidth - Where to wrap text at. * @param currentPos - What position in the line is * cursor currently at. */ static void printStringWidth(std::ostream &out,const std::string& str, int prefix,int currentPos, int maxWidth=70); /** * Wrapper for different version of isnan() on different systems. * @param x - number to be checked for NaN or INF * @return - true if x is finite (-INF < x && x < +INF && x != nan), false otherwise */ static bool isFinite(double x); inline static void PrintTextClassTitle(const std::string &className){ printf ("****%s****\n",className.c_str()); } static void PrintTextFunctionTitle(const std::string &className, const std::string &functionName){ printf ("****%s::%s****\n",className.c_str(),functionName.c_str()); } static std::vector addPrefixSuffix(std::vector middle, const std::string &prefix, const std::string &suffix); static std::vector addPrefixSuffix(const char* middle[], const std::string &prefix, const std::string &suffix); static std::vector addPrefixSuffix(const char* middle[], int size, const std::string &prefix, const std::string &suffix); static std::vector listToVector(const char* in[], int size); static std::vector listToVector(const char* in[]); static std::vector addPrefixSuffix(const char* middle[], const std::string &prefix); static std::vector addPrefixSuffix(std::string middle[], const std::string &prefix, const std::string &suffix); static std::vector addPrefixSuffix(std::string middle[], int size, const std::string &prefix, const std::string &suffix); static std::vector listToVector(std::string in[]); static std::vector listToVector(std::string in[], int size); static std::string joinVectorString(std::vector toJoin, const std::string &sep); static std::string escapeString(const std::string &s, char c, char escape='\\') { std::string e; e.reserve(s.size()); for (size_t i = 0; i < s.size(); i++) { if (s[i] == c || s[i] == escape) { e.push_back(escape); } e.push_back(s[i]); } return e; } static std::string deEscapeString(std::string &s, char escape='\\') { std::string e; e.reserve(s.size()); for (size_t i = 0; i < s.size(); i++) { if (s[i] == escape) { i++; } e.push_back(s[i]); } return e; } static std::wstring toWString( const std::string &src ) { std::wstring dst( src.length(), L' '); std::copy( src.begin(), src.end(), dst.begin()); return dst; } static std::string toString( const std::wstring &src ); private: class StaticMem { public: StaticMem() { uint64_t freeRam = 0, totalRam = 0, swapAvail = 0, memAvail = 0; Util::memInfo(freeRam, totalRam, swapAvail, memAvail, false); m_MemFreeAtStart.push_back(memAvail); } uint64_t getMemFreeAtStart() { return m_MemFreeAtStart[0]; } uint64_t getMemFreeAtBlock() { return m_MemFreeAtStart[m_MemFreeAtStart.size()-1]; } void pushMemFreeAtStart(uint64_t mem) { m_MemFreeAtStart.push_back(mem); } uint64_t popMemFreeAtStart() { uint64_t mem = m_MemFreeAtStart[m_MemFreeAtStart.size()-1]; m_MemFreeAtStart.pop_back(); return mem; } private: std::vector m_MemFreeAtStart; }; static StaticMem &getStaticMem(); }; #ifdef __linux__ bool memInfo_linux(std::string proc_meminfo_filename, uint64_t &free, uint64_t &total, uint64_t &swapAvail, uint64_t& memAvail); #endif #endif /* _UTIL_H */ affxparser/src/fusion/util/Verbose.cpp0000644000175200017520000002154214516003651021122 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file Verbose.cpp * @author Chuck Sugnet * @date Fri Oct 21 17:06:17 2005 * * @brief Class for doing logging and some command line ui. */ // #include "calvin_files/utils/src/StringUtils.h" #include "portability/affy-system-api.h" #include "util/Fs.h" #include "util/Util.h" #include "util/Verbose.h" // #include #include using namespace std; using namespace affymetrix_calvin_utilities; /** * @brief This function will create log file for test * purposes. * The path and name of the log file needs to be specified * prior to use (see fname). */ /// a static pointer to the fstream which will be used for "emergency debugging output". std::fstream* em_out_fstream; void em_out(const std::string& msg) { // if the stream isnt setup, then open it. if (em_out_fstream==NULL) { em_out_fstream=new std::fstream(); std::string fname="EM-OUT-"+ToStr((int)getpid())+"-"+ToStr((int)rand())+".log"; // PATCH: open() may not exist for wstrings, but does for strings. // To avoid compilation errors in Windows, we simply skip the // StringUtils::ConvertMBSToWCS() call. Hopefully ok. If not, // this "emergency debugging output" function shouldn't be called // anyways. /HB 2012-08-29 #ifdef _MSC_VER_HIDE //HB std::wstring wfname=StringUtils::ConvertMBSToWCS(fname); em_out_fstream->open(wfname.c_str(),ios::out); #else em_out_fstream->open(fname.c_str(),ios::out); #endif } // now send the message... *em_out_fstream << msg << "\n"; // ...and be sure the output goes to the file right away, as // this function is used when we have a hard to debug crash. em_out_fstream->flush(); } /** * @brief Print a dot out to let the user know we are still alive * and making progress. * @param verbosity - What level of verbosity this message should be printed at. */ void Verbose::progressStep(int verbosity) { Param &p = getParam(); assert(p.m_DotCount.size() > 0); assert(p.m_DotMod.size() > 0); if(verbosity <= p.m_Verbosity) { p.m_DotCount.back()--; /* Check to see if it is time to call the step report function yet. */ for(unsigned int i = 0; i < p.m_ProHandler.size(); i++) { if(p.m_DotCount.back() <= 0 || p.m_ProHandler[i]->handleAll()) { p.m_ProHandler[i]->progressStep(verbosity); } } if(p.m_DotCount.back() <= 0) { p.m_DotCount.back() = p.m_DotMod.back(); // reset the counter } } } /** * @brief This function gets around the problem of static variable * initialization as local static variables work more consistently. * @return Param - Our static parameters for this class. */ Verbose::Param &Verbose::getParam() { // Avoid weird windows .NET/Forms bug where linking to cout/cerr can cause problems... static std::ostream *out = NULL; if (std::cerr.good()) { out = &std::cerr; } #ifdef _NO_STD_OUT static ProgressDot progHandler(0, NULL); static MsgStream msgHandler(0, NULL); #else // Normal console mode is ok. static ProgressDot progHandler(1, out); static MsgStream msgHandler(1, out); #endif // By default we just use a normal message handler as the warning handler. static Verbose::Param m_Param(&progHandler, &msgHandler, &msgHandler); return m_Param; } /// @brief Functions to add and remove handlers for communcation functions. void Verbose::pushProgressHandler(ProgressHandler *handler) { getParam().m_ProHandler.push_back(handler); } void Verbose::popProgressHandler() { getParam().m_ProHandler.pop_back(); } void Verbose::pushMsgHandler(MsgHandler *handler) { getParam().m_MsgHandler.push_back(handler); // pushWarnHandler(handler); // @todo - Make this simpler. Too many push/pop calls currently } void Verbose::popMsgHandler() { getParam().m_MsgHandler.pop_back(); // popWarnHandler(); // @todo - Make this simpler. Too many push/pop calls currently } void Verbose::pushWarnHandler(MsgHandler *handler) { getParam().m_WarnHandler.push_back(handler); } void Verbose::popWarnHandler() { getParam().m_WarnHandler.pop_back(); } void Verbose::progressBegin(int verbosity, const std::string &msg, int total, int dotMod, int maxCalls) { std::vector &m_Handle = getParam().m_ProHandler; getParam().m_DotMod.push_back(dotMod); getParam().m_DotCount.push_back(0); // if we have handlers let them know we're beginning. for(unsigned int i = 0; i < m_Handle.size(); i++) { ProgressHandler *handle = m_Handle[i]; if(handle->handleAll()) handle->progressBegin(verbosity, msg, maxCalls); else handle->progressBegin(verbosity, msg, total); } } void Verbose::progressEnd(int verbosity, const std::string &msg) { Verbose::Param &p = getParam(); assert(p.m_DotCount.size() > 0); assert(p.m_DotMod.size() > 0); p.m_DotMod.pop_back(); p.m_DotCount.pop_back(); if(verbosity <= p.m_Verbosity) { for(unsigned int i = 0; i < p.m_ProHandler.size(); i++) { p.m_ProHandler[i]->progressEnd(verbosity, msg); } } } void Verbose::removeMsgHandler(MsgHandler *h) { Verbose::Param &p = getParam(); removeMsgHandler(p.m_MsgHandler, h); removeMsgHandler(p.m_WarnHandler, h); } void Verbose::removeProgressHandler(ProgressHandler *h) { Verbose::Param &p = getParam(); removeProgressHandler(p.m_ProHandler, h); } void Verbose::removeMsgHandler(std::vector &vec, MsgHandler *h) { vector::iterator i; for (i = vec.begin(); i < vec.end(); i++) { if (*i == h) { vec.erase(i); break; } } } void Verbose::removeProgressHandler(std::vector &vec, ProgressHandler *h) { vector::iterator i; for (i = vec.begin(); i < vec.end(); i++) { if (*i == h) { vec.erase(i); break; } } } void Verbose::removeDefault() { Verbose::Param &p = getParam(); if (!p.m_ProHandler.empty() && p.m_ProHandler[0] == p.m_ProgDefault) { p.m_ProHandler.erase(p.m_ProHandler.begin()); p.m_ProgDefault = NULL; } if (!p.m_MsgHandler.empty() && p.m_MsgHandler[0] == p.m_MsgDefault) { p.m_MsgHandler.erase(p.m_MsgHandler.begin()); p.m_MsgDefault = NULL; } if (!p.m_WarnHandler.empty() && p.m_WarnHandler[0] == p.m_WarnDefault) { p.m_WarnHandler.erase(p.m_WarnHandler.begin()); } } /** * @brief Set whether or not output messages are logged * useful to turn off output when catching expected errors * * @param output - true or false */ void Verbose::setOutput(bool output) { Verbose::Param &p = getParam(); p.m_Output = output; } /** * @brief Set the level of verbosity desired. 0 == no messages * 1 == normal messages, 2,3,4, etc. == more verbose. * @param level - level of verbosity desired. */ void Verbose::setLevel(int level) { Verbose::Param &p = getParam(); p.m_Verbosity = level; for(unsigned int i = 0; i < p.m_ProHandler.size(); i++) { p.m_ProHandler[i]->setBaseVerbosity(level); } for(unsigned int i = 0; i < p.m_MsgHandler.size(); i++) { p.m_MsgHandler[i]->setBaseVerbosity(level); } } /** * @brief Print a message to the stream. * @param level - What level of verbosity this message should be printed at. * @param s - Message to be printed. * @param nl - Should a newline be appended to message? */ void Verbose::out(int level, const std::string &s, bool nl) { Verbose::Param &p = getParam(); if(p.m_Output) { for(unsigned int i = 0; i < p.m_MsgHandler.size(); i++) { p.m_MsgHandler[i]->message(level, s, nl); } } // this forces our messages out to the OS, so we know what is going on. // If someone (errabort) calls exit, the messages might be left in our buffers and not written out. fflush(NULL); } /** * @brief Print a warning message. * @param level - What level of verbosity this message should be printed at. * @param s - Message to be printed. * @param nl - Should a newline be appended to message? */ void Verbose::warn(int level, const std::string &s, bool nl, const std::string prefix) { Verbose::Param &p = getParam(); if(p.m_Output) { for(unsigned int i = 0; i < p.m_WarnHandler.size(); i++) { p.m_WarnHandler[i]->message(level, prefix + s, nl); } } } affxparser/src/fusion/util/Verbose.h0000644000175200017520000001160414516003651020565 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file Verbose.h * @author Chuck Sugnet * @date Fri Oct 21 16:54:00 2005 * * @brief Class for doing logging and some command line ui. */ #ifndef _VERBOSE_H #define _VERBOSE_H // #include "portability/apt-win-dll.h" #include "util/MsgHandler.h" #include "util/MsgStream.h" #include "util/ProgressDot.h" #include "util/ProgressHandler.h" // #include #include #include #include #include #include void em_out(const std::string& msg); /** * Verbose * @brief Class for doing logging and some command line ui. */ class APTLIB_API Verbose { public: /** * Utility class for remembering our parameters of interest. */ class Param { public: Param(ProgressHandler *progHandler, MsgHandler *msgHandler, MsgHandler *warnHandler) { m_Verbosity = 1; m_NewLine = true; m_Output = true; m_ProHandler.push_back(progHandler); m_MsgHandler.push_back(msgHandler); m_WarnHandler.push_back(warnHandler); m_ProgDefault = progHandler; m_MsgDefault = msgHandler; m_WarnDefault = warnHandler; } ProgressHandler *m_ProgDefault; MsgHandler *m_MsgDefault; MsgHandler *m_WarnDefault; std::vector m_ProHandler; ///< Vector of handlers to be called with progress reports. std::vector m_MsgHandler; ///< Vector of handlers for messages (i.e. log files, consoles, dialogs). std::vector m_WarnHandler; ///< Vector of handlers for warnings bool m_NewLine; ///< Was a new line outputted after last message? bool m_Output; ///< Do we report messages int m_Verbosity; ///< What level of messages is wanted, larger num == more msgs std::vector m_DotMod; ///< How often do we print a dot when dot() is called? std::vector m_DotCount; ///< How many times has dot() been called? }; /** * @brief This function gets around the problem of static variable * initialization as local static variables work more consistently. * @return Param - Our static parameters for this class. */ static Param &getParam(); /// @brief Functions to add and remove handlers for communcation functions. static void pushProgressHandler(ProgressHandler *handler); static void popProgressHandler(); static void removeProgressHandler(ProgressHandler *h); static void removeProgressHandler(std::vector &vec, ProgressHandler *h); static void pushMsgHandler(MsgHandler *handler); static void popMsgHandler(); static void removeMsgHandler(MsgHandler *handler); static void removeMsgHandler(std::vector &vec, MsgHandler *h); static void pushWarnHandler(MsgHandler *handler); static void popWarnHandler(); static void progressBegin(int verbosity, const std::string &msg, int total, int dotMod, int maxCalls); static void progressStep(int verbosity); static void progressEnd(int verbosity, const std::string &msg); static void removeDefault(); /** * @brief Set whether or not output messages are logged * useful to turn off output when catching expected errors * * @param output - true or false */ static void setOutput(bool output); /** * @brief Set the level of verbosity desired. 0 == no messages * 1 == normal messages, 2,3,4, etc. == more verbose. * @param level - level of verbosity desired. */ static void setLevel(int level); /** * @brief Print a message to the stream. * @param level - What level of verbosity this message should be printed at. * @param s - Message to be printed. * @param nl - Should a newline be appended to message? */ static void out(int level, const std::string &s, bool nl = true); /** * @brief Print a warning message. * @param level - What level of verbosity this message should be printed at. * @param s - Message to be printed. * @param nl - Should a newline be appended to message? */ static void warn(int level, const std::string &s, bool nl = true, const std::string prefix = "\nWARNING: "); }; #endif /* _VERBOSE_H */ affxparser/src/fusion/util/VerboseErrHandler.h0000644000175200017520000000620114516003651022531 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /** * @file VerboseErrHandler.h * @author Chuck Sugnet * @date Mon Jun 26 12:26:18 PDT 2006 * * @brief Basic error handler that uses Verbose::warn() and dies by either throwing an * exception or calling abort() */ #ifndef VERBOSEERRHANDLER_H #define VERBOSEERRHANDLER_H #include "util/ErrHandler.h" #include "util/Except.h" #include "util/Verbose.h" // #include #include // /** * @brief Basic error handler that prints to cerr and dies by either throwing an * exception or calling abort() */ class VerboseErrHandler : public ErrHandler { public: VerboseErrHandler(bool doThrow=false, bool verbose=true, bool exitOnError=true, int exitOnErrorValue=-1) { m_Throw = doThrow; m_Verbose = verbose; m_ExitOnError = exitOnError; m_ExitOnErrorValue = exitOnErrorValue; } /** Virtual destructor for a virtual class. */ virtual ~VerboseErrHandler() {} /** * When something terminal happens this function is called. This is * the place to do any last minute cleanup, log writing, and * communication with the user about what went wrong. The * implementation of this function should either exit the program or * throw an exception. * * @param msg - Message about what went wrong. */ virtual void handleError(const std::string &msg) { if(m_Verbose){ Verbose::out(1, msg); } if (m_ExitOnError) { #ifdef _WIN32 // windows needs time for the other process to run, yeild to it. Sleep(500); #endif exit(m_ExitOnErrorValue); } if (m_Throw) { throw Except(msg); } // We shouldnt hit here; We should be exiting or rethrowing the error. // If we do, then something is wrong. // abort(1); } /** Will this error handler be throwing exceptions? */ virtual bool getThrows() { return m_Throw; } /** Will this error handler be throwing exceptions? */ virtual void setThrows(bool doThrow) { m_Throw= doThrow; } private: bool m_Throw; ///< Determines if we throw an exception or just call abort() bool m_Verbose; ///< Determines if we call Verbose::out bool m_ExitOnError; ///< Determines if overriding default value to return when exiting on error int m_ExitOnErrorValue; ///< Value to set if setting the value to return when exiting on errors }; #endif /* VERBOSEERRHANDLER_H */ affxparser/src/fusion/util/apt-check-calvinchp.cpp0000644000175200017520000001245214516003651023321 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "calvin_files/data/src/CHPData.h" #include "calvin_files/data/src/CHPMultiDataData.h" #include "calvin_files/parsers/src/CHPFileReader.h" #include "calvin_files/parsers/src/CHPMultiDataFileReader.h" #include "calvin_files/parsers/src/CHPQuantificationFileReader.h" #include "calvin_files/utils/src/StringUtils.h" #include "portability/affy-base-types.h" #include "util/AffxByteArray.h" #include "util/CalvinChpCheck.h" #include "util/PgOptions.h" #include "util/RegressionCheck.h" #include "util/Util.h" #include "util/Verbose.h" // #include #include #include #include #include #include #include #include #include using namespace affymetrix_calvin_io; using namespace affymetrix_calvin_data; using namespace std; const int passed = 0; const int failed = 1; int main(int argc, char* argv[]) { try { //If no arguments given if(argc == 0) { cout<<"Error: Incorrect number of arguments passed to Calvin Chp Check"<setUsage("\n\n*****apt-check-calvinchpy Help*****"); //Define acceptable Command Line arguments for apt-check-calvinchp opts->defineOption("h", "help", PgOpt::BOOL_OPT, "A bool value, answer true if you want help!", "false"); opts->defineOption("g", "gold", PgOpt::STRING_OPT, "A String representing theGold file to check against - the perfect/reference file", ""); opts->defineOption("n", "gen", PgOpt::STRING_OPT, "A String representing the Generated File to check - the test file", ""); opts->defineOption("d", "diffAllowed", PgOpt::INT_OPT, "An integer representing the number of acceptable mismatches/errors before the check fails", "0"); opts->defineOption("p", "prefix", PgOpt::STRING_OPT, "A string representing hte prefix for the header string alg part", "apt-"); opts->defineOption("e", "epsilon", PgOpt::DOUBLE_OPT, "A double that is the maximum accetable difference calculated", "0.0001"); opts->defineOption("b", "bCheckHeaders", PgOpt::BOOL_OPT, "", "true"); opts->defineOption("v", "expectedRetVal", PgOpt::INT_OPT, "An integer value representing the desired return value used to flag a positive or negative test. 0 is positive, 1 is negative", "0"); opts->defineOption("t", "testName", PgOpt::STRING_OPT, "Name of the Test being Run", ""); //Parse argv and store it within PgOptions class variables //Match the command line arguments to the proper options opts->parseArgv(argv); //DEBUG //opts->dump(); if(opts->getBool("help") == true) { opts->usage(); return passed; } std::string gen; std::string gold; if(opts->get("gen") != "") { gen = opts->get("gen"); } else { cout<<"Error: No value for Generated Files (gen) given. Exiting"<get("gold") != "") { gold = opts->get("gold"); } else { cout<<"Error: No value for Gold Files (gold) given. Exiting"<get("prefix"); std::wstring prefix(tempPrefix.length(), L'a'); std::copy(tempPrefix.begin(), tempPrefix.end(), prefix.begin()); //Run Check int diffAllowed = opts->getInt("diffAllowed"); double epsilon = opts->getDouble("epsilon"); bool bCheckHeaders = opts->getBool("bCheckHeaders"); int expectedRetVal = opts->getInt("expectedRetVal"); std::string testName = opts->get("testName"); CalvinChpCheck* cCheck; cCheck = new CalvinChpCheck(gen, gold, diffAllowed, prefix, epsilon, bCheckHeaders); string errorMsg = ""; bool pass = cCheck->check(errorMsg); //Test for Regular Pass (Positive) if(expectedRetVal == 0) { if(!pass) { cout<<"Error in " + testName + "(): " + errorMsg< #include #include #include // using namespace std; const int passed = 0; const int failed = 1; int main(int argc, char* argv[]) { try { //If no arguments given if(argc == 0) { cout<<"Error: Incorrect number of arguments passed to Cel Check"<setUsage("\n\n*****apt-check-cel Help*****"); //Define acceptable Command Line arguments for apt-check-cel opts->defineOption("h", "help", PgOpt::BOOL_OPT, "A bool value, answer true if you want help!", "false"); opts->defineOption("n", "gen", PgOpt::STRING_OPT, "A String representing the Generated Files to check - the test file", ""); opts->defineOption("g", "gold", PgOpt::STRING_OPT, "String representing the Gold file to check against - the perfect/reference file", ""); opts->defineOption("p", "prefix", PgOpt::STRING_OPT, "A string representing hte prefix for the header string alg part", ""); opts->defineOption("e", "epsilon", PgOpt::DOUBLE_OPT, "A double that is the maximum accetable difference calculated", "0"); opts->defineOption("d", "diffAllowed", PgOpt::INT_OPT, "An integer representing the number of acceptable mismatches/errors before the check fails", "0"); opts->defineOption("v", "expectedRetVal", PgOpt::INT_OPT, "An integer value representing the desired return value used to flag a positive or negative test. 0 is positive, 1 is negative", "0"); opts->defineOption("t", "testName", PgOpt::STRING_OPT, "Name of the Test being Run", ""); //Parse argv and store it within PgOptions class variables //Match the command line arguments to the proper options opts->parseArgv(argv); if(opts->getBool("help") == true) { opts->usage(); return passed; } std::string gen; std::string gold; if(opts->get("gen") != "") { gen = opts->get("gen"); } else { cout<<"Error: No value for Generated Files (gen) given. Exiting"<get("gold") != "") { gold = opts->get("gold"); } else { cout<<"Error: No value for Gold Files (gold) given. Exiting"<getDouble("epsilon"); std::string prefix = opts->get("prefix"); int diffAllowed = opts->getInt("diffAllowed"); int expectedRetVal = opts->getInt("expectedRetVal"); std::string testName = opts->get("testName"); //Run Check CelCheck* cCheck; cCheck = new CelCheck(gen, gold, eps, prefix, diffAllowed); string errorMsg = ""; bool pass = cCheck->check(errorMsg); //Test for Regular Pass (Positive) if(expectedRetVal == 0) { if(!pass) { cout<<"Error in " + testName + "(): " + errorMsg< #include #include #include // using namespace std; const int passed = 0; const int failed = 1; int main(int argc, char* argv[]) { try { //If no arguments given if(argc == 0) { cout<<"Error: Incorrect number of arguments passed to Chp Check"<setUsage("\n\n*****apt-check-chp Help*****"); //Define acceptable Command Line arguments for apt-check-chp opts->defineOption("h", "help", PgOpt::BOOL_OPT, "A bool value, answer true if you want help!", "false"); opts->defineOption("g", "gold", PgOpt::STRING_OPT, "A String representing theGold file to check against - the perfect/reference file", ""); opts->defineOption("n", "gen", PgOpt::STRING_OPT, "A String representing the Generated File to check - the test file", ""); opts->defineOption("e", "epsilon", PgOpt::DOUBLE_OPT, "A double that is the maximum accetable difference calculated", "0.0001"); opts->defineOption("p", "prefix", PgOpt::STRING_OPT, "A string representing hte prefix for the header string alg part", "apt-"); opts->defineOption("d", "diffAllowed", PgOpt::INT_OPT, "An integer representing the number of acceptable mismatches/errors before the check fails", "0"); opts->defineOption("b", "bCheckHeaders", PgOpt::BOOL_OPT, "", "true"); opts->defineOption("v", "expectedRetVal", PgOpt::INT_OPT, "An integer value representing the desired return value used to flag a positive or negative test. 0 is positive, 1 is negative", "0"); opts->defineOption("t", "testName", PgOpt::STRING_OPT, "testName = Name of the Test being Run", ""); //Parse argv and store it within PgOptions class variables //Match the command line arguments to the proper options opts->parseArgv(argv); if(opts->getBool("help") == true) { opts->usage(); return passed; } std::string gen; std::string gold; if(opts->get("gen") != "") { gen = opts->get("gen"); } else { cout<<"Error: No value for Generated Files (gen) given. Exiting"<get("gold") != "") { gold = opts->get("gold"); } else { cout<<"Error: No value for Gold Files (gold) given. Exiting"<getDouble("epsilon"); int diffAllowed = opts->getInt("diffAllowed"); std::string prefix = opts->get("prefix"); bool bCheckHeaders = opts->getBool("bCheckHeaders"); int expectedRetVal = opts->getInt("expectedRetVal"); std::string testName = opts->get("testName"); //Run Check ChpCheck* cCheck; cCheck = new ChpCheck(gen, gold, diffAllowed, prefix, eps, bCheckHeaders); string errorMsg = ""; bool pass = cCheck->check(errorMsg); //Test for Regular Pass (Positive) if(expectedRetVal == 0) { if(!pass) { cout<<"Error in " + testName + "(): " + errorMsg< #include #include #include // using namespace std; const int passed = 0; const int failed = 1; int main(int argc, char* argv[]) { try { //If no arguments given if(argc == 0) { cout<<"Error: Incorrect number of arguments passed to Matrix Check"<setUsage("\n\n*****apt-check-matrix Help*****"); //Define acceptable Command Line arguments for apt-check-matrix opts->defineOption("h", "help", PgOpt::BOOL_OPT, "A bool value, answer true if you want help!", "false"); opts->defineOption("g", "gold", PgOpt::STRING_OPT, "A String representing theGold file to check against - the perfect/reference file", ""); opts->defineOption("n", "gen", PgOpt::STRING_OPT, "A String representing the Generated File to check - the test file", ""); opts->defineOption("e", "epsilon", PgOpt::DOUBLE_OPT, "A double that is the maximum accetable difference calculated", "0"); opts->defineOption("r", "rowSkip", PgOpt::INT_OPT, "An integer that signifies the number of rows to skip before comparing", "0"); opts->defineOption("c", "columnSkip", PgOpt::INT_OPT, "An integer that signifies the number of columns to skip before comparing", "0"); opts->defineOption("m", "matchNames", PgOpt::BOOL_OPT, "A bool that determines whether or not to match/compare results based upon the name in the first column. Useful if the files aren't necessarily in the same order.", "false"); opts->defineOption("a", "allowedMismatch", PgOpt::INT_OPT, "An integer representing the number of acceptable mismatches/errors before the check fails", "0"); opts->defineOption("v", "expectedRetVal", PgOpt::INT_OPT, "An integer value representing the desired return value used to flag a positive or negative test. 0 is positive, 1 is negative", "0"); opts->defineOption("t", "testName", PgOpt::STRING_OPT, "Name of the Test being Run", ""); //Parse argv and store it within PgOptions class variables //Match the command line arguments to the proper options opts->parseArgv(argv); //DEBUG //opts->dump(); if(opts->getBool("help") == true) { opts->usage(); return passed; } std::string gen; std::string gold; if(opts->get("gen") != "") { gen = opts->get("gen"); } else { cout<<"Error: No value for Generated Files (gen) given. Exiting"<get("gold") != "") { gold = opts->get("gold"); } else { cout<<"Error: No value for Gold Files (gold) given. Exiting"<getDouble("epsilon"); int rowSkip = opts->getInt("rowSkip"); int colSkip = opts->getInt("columnSkip"); bool matchNames = opts->getBool("matchNames"); int allowedMismatch = opts->getInt("allowedMismatch"); int expectedRetVal = opts->getInt("expectedRetVal"); std::string testName = opts->get("testName"); //Run Check MatrixCheck* mCheck; mCheck = new MatrixCheck(gen, gold, eps, rowSkip, colSkip, matchNames, allowedMismatch); std::string errorMsg = ""; bool pass = mCheck->check(errorMsg); //Test for Regular Pass (Positive) if(expectedRetVal == 0) { if(!pass) { Verbose::out(1, "Error in " + testName + "(): " + errorMsg); Verbose::out(1, "Error: Failed Matrix Check"); return failed; } else { Verbose::out(1, "Passed Matrix Check"); return passed; } } //Test for Negative else { if(pass) { Verbose::out(1, "Error in " + testName + "(): " + errorMsg); Verbose::out(1, "Error: Failed Matrix Check"); return failed; } else { Verbose::out(1, "Passed Matrix Check"); return passed; } } } } catch(...) { Verbose::out(1,"Unexpected Error: uncaught exception."); return failed; } return failed; } affxparser/src/fusion/util/apt-check-mixedfile.cpp0000644000175200017520000001077214516003651023323 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2009 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "util/MixedFileCheck.h" #include "util/PgOptions.h" #include "util/RegressionCheck.h" #include "util/Util.h" #include "util/Verbose.h" // #include #include #include #include // using namespace std; const int passed = 0; const int failed = 1; int main(int argc, char* argv[]) { try{ //If no arguments given if(argc == 0) { cout<<"Error: Incorrect number of arguments passed to Mixed File Check"<setUsage("\n\n*****apt-check-mixedfile Help*****"); //Define acceptable Command Line arguments for apt-check-matrix opts->defineOption("h", "help", PgOpt::BOOL_OPT, "A bool value, answer true if you want help!", "false"); opts->defineOption("g", "gold", PgOpt::STRING_OPT, "A String representing the Gold file to check against - the perfect/reference file", ""); opts->defineOption("n", "gen", PgOpt::STRING_OPT, "A String representing the Generated File to check - the test file", ""); opts->defineOption("e", "epsilon", PgOpt::DOUBLE_OPT, "A double that is the maximum accetable difference calculated", "0"); opts->defineOption("s", "skipLines", PgOpt::INT_OPT, "An integer that signifies the number of lines to skip before comparing", "0"); opts->defineOption("a", "allowedMismatch", PgOpt::INT_OPT, "An integer representing the the threshold for the number of acceptable mismatches/errors before the check fails", "0"); opts->defineOption("v", "expectedRetVal", PgOpt::INT_OPT, "An integer value representing the desired return value used to flag a positive or negative test. 0 is positive, 1 is negative", "0"); opts->defineOption("t", "testName", PgOpt::STRING_OPT, "Name of the Test being Run", ""); //Parse argv and store it within PgOptions class variables //Match the command line arguments to the proper options opts->parseArgv(argv); if(opts->getBool("help") == true) { opts->usage(); return passed; } std::string gen; std::string gold; if(opts->get("gen") != "") { gen = opts->get("gen"); } else { cout<<"Error: No value for Generated Files (gen) given. Exiting"<get("gold") != "") { gold = opts->get("gold"); } else { cout<<"Error: No value for Gold Files (gold) given. Exiting"<getDouble("epsilon"); int skipLines = opts->getInt("skipLines"); int allowedMismatch = opts->getInt("allowedMismatch"); int expectedRetVal = opts->getInt("expectedRetVal"); std::string testName = opts->get("testName"); //Run Check MixedFileCheck* mfCheck; mfCheck = new MixedFileCheck(gen, gold, eps, skipLines, allowedMismatch); string errorMsg = ""; bool pass = mfCheck->check(errorMsg); //Test for Regular Pass (Positive) if(expectedRetVal == 0) { if(!pass) { cout<<"Error in " + testName + "(): " + errorMsg< #include #include #include using namespace std; const int passed = 0; const int failed = 1; int main(int argc, char* argv[]) { try { //If no arguments given if(argc == 0) { cout<<"Error: Incorrect number of arguments passed to Matrix Check"<setUsage("\n\n*****apt-check-textfile Help*****"); //Define acceptable Command Line arguments for apt-check-textfile opts->defineOption("h", "help", PgOpt::BOOL_OPT, "A bool value, answer true if you want help!", "false"); opts->defineOption("g", "gold", PgOpt::STRING_OPT, "A String representing the Gold file to check against - the perfect/reference file", ""); opts->defineOption("n", "gen", PgOpt::STRING_OPT, "A String representing the Generated File to check - the test file", ""); opts->defineOption("s", "skipLines", PgOpt::INT_OPT, "An integer that signifies the number of lines to skip before comparing", "0"); opts->defineOption("v", "expectedRetVal", PgOpt::INT_OPT, "An integer value representing the desired return value used to flag a positive or negative test. 0 is positive, 1 is negative", "0"); opts->defineOption("t", "testName", PgOpt::STRING_OPT, "Name of the Test being Run", ""); //Parse argv and store it within PgOptions class variables //Match the command line arguments to the proper options opts->parseArgv(argv); //DEBUG //opts->dump(); if(opts->getBool("help") == true) { opts->usage(); return passed; } std::string gen; std::string gold; if(opts->get("gen") != "") { gen = opts->get("gen"); } else { cout<<"Error: No value for Generated File (gen) given. Exiting"<get("gold") != "") { gold = opts->get("gold"); } else { cout<<"Error: No value for Gold File (gold) given. Exiting"<getInt("skipLines"); int expectedRetVal = opts->getInt("expectedRetVal"); std::string testName = opts->get("testName"); //Run Check TextFileCheck* tCheck; tCheck = new TextFileCheck(gen, gold, skipLines); std::string errorMsg = ""; bool pass = tCheck->check(errorMsg); //Test for Regular Pass (Positive) if(expectedRetVal == 0) { if(!pass) { cout<<"Error in " + testName + "(): " + errorMsg< #include #include #include // #ifndef WIN32 #include #endif /* WIN32 */ using namespace std; /** Everybody's favorite function... */ int main(int argc, char *argv[]) { ofstream logOut; LogStream log; string logName; bool closeLog = false; try { const string version = AptVersionInfo::version(); const string cvsId = AptVersionInfo::cvsId(); const string versionToReport = AptVersionInfo::versionToReport(); /* Parse options. */ PgOptions opts; opts.setUsage("apt-engine-wrapper - Generic wrapper around the engine\n" "factory. You probably want to use one of the engine\n" "specific wrappers. Use ' -- ' to separate wrapper args\n" "from engine args.\n\n" "usage:\n" " apt-engine-wrapper --help \n" " apt-engine-wrapper --engine ProbesetSummarizeEngine --help \n" " apt-engine-wrapper --engine ProbesetSummarizeEngine -- \\\n" " --cdf-file=... \n\n"); opts.defineOption("", "engine", PgOpt::STRING_OPT, "The engine to run.", ""); opts.defineOption("", "log-file", PgOpt::STRING_OPT, "The file to write out log items to.", ""); opts.defineOption("h", "help", PgOpt::BOOL_OPT, "This message.", "false"); opts.defineOption("", "version", PgOpt::BOOL_OPT, "Display version information.", "false"); opts.defineOption("", "verbose", PgOpt::INT_OPT, "Verbosity Level.", "1"); int argvPos = opts.parseArgv(argv); const string progName = Fs::basename(opts.getProgName()); // does the user want the version if(opts.getBool("version")) { cout << "version: " << versionToReport << endl; exit(0); } // Do we need help? (I know I do...) else if(opts.getBool("help") || argc == 1) { set toHide; opts.usage(toHide, true); if(opts.get("engine")!= "") { BaseEngine *engine = EngineReg::CreateEngine(opts.get("engine")); if(engine == NULL) Err::errAbort("Unable to create engine named '"+opts.get("engine")+"'"); engine->optionUsage(toHide, true); } else { list engineNames = EngineReg::GetEngineNames(); cout << endl << "Available Engines:" << endl; for(list::iterator iter = engineNames.begin(); iter != engineNames.end(); iter++) { cout << " " << *iter << endl; } } cout << endl << "version: " << versionToReport << endl; exit(0); } else if(opts.get("engine")!= "") { BaseEngine *engine = EngineReg::CreateEngine(opts.get("engine")); if(engine == NULL) Err::errAbort("Unable to create engine named '"+opts.get("engine")+"'"); /* Set up the logging and message handlers. */ Verbose::setLevel(opts.getInt("verbose")); if(opts.get("log-file") != "") { string logName = opts.get("log-file"); Fs::mustOpenToWrite(logOut, logName.c_str()); log.setStream(&logOut); Verbose::pushMsgHandler(&log); Verbose::pushProgressHandler(&log); Verbose::pushWarnHandler(&log); closeLog = true; } engine->parseArgv(argv,argvPos+1); engine->run(); } } catch(...) { Verbose::out(1,"Unexpected Error: uncaught exception."); // Close log files if(closeLog) { logOut.close(); } return 1; } // Close log files if(closeLog) { logOut.close(); } return 0; } affxparser/src/fusion/util/apt-engine-wrapper.vcproj0000644000175200017520000002057314516003651023746 0ustar00biocbuildbiocbuild affxparser/src/fusion/util/chksum.cpp0000644000175200017520000000316214516003651021005 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include "util/chksum.h" using namespace affxutil; /* * Store the one's complement of the checksum in the header of the data. * To validate the data, compute the one's complement of the data. The * resulting checksum should be zero. */ uint16_t CheckSum::OnesComplementCheckSum(void *addr, size_t size) { uint16_t *pdata = (uint16_t *)addr; uint32_t sum = 0; uint16_t checksum; // Compute Internet Checksum for "size" bytes // beginning at location "addr". while( size > 1 ) { sum += *pdata++; size -= 2; } // Add left-over byte, if any if( size > 0 ) sum += * (unsigned char *) pdata; // Fold 32-bit sum to 16 bits while (sum>>16) sum = (sum & 0xffff) + (sum >> 16); // Compute the one's complement of the checksum. checksum = (uint16_t) ~sum; return checksum; } affxparser/src/fusion/util/chksum.h0000644000175200017520000000267614516003651020463 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #ifndef _UTIL_CHECKSUM_H #define _UTIL_CHECKSUM_H /** * @file chksum.h * @brief This file provides functions for computing a checksum. */ // #include "portability/affy-base-types.h" // #include // namespace affxutil { /*! A class to provide checksum functions */ class CheckSum { public: /*! * Computes a one's complement checksum. * * @param addr The memory address of the data. * @param size The number of bytes pointed to by the memory address. * @return The ones compliment checksum. */ static uint16_t OnesComplementCheckSum(void *addr, size_t size); }; }; #endif // _UTIL_CHECKSUM_H affxparser/src/fusion/util/dump-guid.cpp0000644000175200017520000000253614516003651021412 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include "util/Guid.h" // #include #include // #ifndef WIN32 #include #endif int main(int argc,char* argv[]) { // #ifndef WIN32 printf("time =%10d\n",(int)time(NULL) ); printf("pid =%10d\n",(int)getpid() ); printf("host =%10d\n",(int)gethostid()); #endif affxutil::Guid guidmaker; affxutil::GuidType guid; int n = 5; if(argc>1) { n = atoi(argv[1]); } for (int i=0;i #include #include #include #ifdef _MSC_VER #pragma warning( disable: 4244 ) #endif ////////// #define PUT_64BIT_LE(cp, value) do { \ (cp)[7] = (value) >> 56; \ (cp)[6] = (value) >> 48; \ (cp)[5] = (value) >> 40; \ (cp)[4] = (value) >> 32; \ (cp)[3] = (value) >> 24; \ (cp)[2] = (value) >> 16; \ (cp)[1] = (value) >> 8; \ (cp)[0] = (value); } while (0) #define PUT_32BIT_LE(cp, value) do { \ (cp)[3] = (value) >> 24; \ (cp)[2] = (value) >> 16; \ (cp)[1] = (value) >> 8; \ (cp)[0] = (value); } while (0) static u_int8_t PADDING[MD5_BLOCK_LENGTH] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; /// @brief Start MD5 accumulation. Set bit count /// to 0 and buffer to mysterious initialization constants. /// @param ctx The context structure to init void MD5Init(MD5_CTX *ctx) { ctx->count = 0; ctx->state[0] = 0x67452301; ctx->state[1] = 0xefcdab89; ctx->state[2] = 0x98badcfe; ctx->state[3] = 0x10325476; } /// @brief Update context to reflect the concatenation of another buffer full of bytes. /// @param ctx The context to update /// @param input pointer to the buffer of bytes /// @param len buffer length void MD5Update(MD5_CTX *ctx, const unsigned char *input, u_int32_t len) { u_int32_t have, need; /* Check how many bytes we already have and how many more we need. */ have = (u_int32_t)((ctx->count >> 3) & (MD5_BLOCK_LENGTH - 1)); need = MD5_BLOCK_LENGTH - have; /* Update bitcount */ ctx->count += (u_int64_t)len << 3; if (len >= need) { if (have != 0) { memcpy(ctx->buffer + have, input, need); MD5Transform(ctx->state, ctx->buffer); input += need; len -= need; have = 0; } /* Process data in MD5_BLOCK_LENGTH-byte chunks. */ while (len >= MD5_BLOCK_LENGTH) { MD5Transform(ctx->state, input); input += MD5_BLOCK_LENGTH; len -= MD5_BLOCK_LENGTH; } } /* Handle any remaining bytes of data. */ if (len != 0) memcpy(ctx->buffer + have, input, len); } /// @brief Final wrapup - pad to 64-byte boundary with the bit pattern /// 1 0* (64-bit count of bits processed, MSB-first) /// @param digest The output digest /// @param ctx The content to finalize void MD5Final(unsigned char digest[MD5_DIGEST_LENGTH], MD5_CTX *ctx) { u_int8_t count[8]; u_int32_t padlen; int i; /* Convert count to 8 bytes in little endian order. */ PUT_64BIT_LE(count, ctx->count); /* Pad out to 56 mod 64. */ padlen = MD5_BLOCK_LENGTH - ((ctx->count >> 3) & (MD5_BLOCK_LENGTH - 1)); if (padlen < 1 + 8) padlen += MD5_BLOCK_LENGTH; MD5Update(ctx, PADDING, padlen - 8); /* padlen - 8 <= 64 */ MD5Update(ctx, count, 8); if (digest != NULL) { for (i = 0; i < 4; i++) PUT_32BIT_LE(digest + i * 4, ctx->state[i]); } //bzero(ctx, sizeof(*ctx)); /* in case it's sensitive */ memset(ctx, 0, sizeof(*ctx)); } /* The four core functions - F1 is optimized somewhat */ /* #define F1(x, y, z) (x & y | ~x & z) */ #define F1(x, y, z) (z ^ (x & (y ^ z))) #define F2(x, y, z) F1(z, x, y) #define F3(x, y, z) (x ^ y ^ z) #define F4(x, y, z) (y ^ (x | ~z)) /* This is the central step in the MD5 algorithm. */ #define MD5STEP(f, w, x, y, z, data, s) \ ( w += f(x, y, z) + data, w = w<>(32-s), w += x ) /** * @brief * The core of the MD5 algorithm, this alters an existing MD5 hash to * reflect the addition of 16 longwords of new data. MD5Update blocks * the data and converts bytes into longwords for this routine. */ void MD5Transform(u_int32_t state[4], const u_int8_t block[MD5_BLOCK_LENGTH]) { u_int32_t a, b, c, d, in[MD5_BLOCK_LENGTH / 4]; #if BYTE_ORDER == LITTLE_ENDIAN memcpy(in,block, sizeof(in)); #else for (a = 0; a < MD5_BLOCK_LENGTH / 4; a++) { in[a] = (u_int32_t)( (u_int32_t)(block[a * 4 + 0]) | (u_int32_t)(block[a * 4 + 1]) << 8 | (u_int32_t)(block[a * 4 + 2]) << 16 | (u_int32_t)(block[a * 4 + 3]) << 24); } #endif a = state[0]; b = state[1]; c = state[2]; d = state[3]; MD5STEP(F1, a, b, c, d, in[ 0] + 0xd76aa478, 7); MD5STEP(F1, d, a, b, c, in[ 1] + 0xe8c7b756, 12); MD5STEP(F1, c, d, a, b, in[ 2] + 0x242070db, 17); MD5STEP(F1, b, c, d, a, in[ 3] + 0xc1bdceee, 22); MD5STEP(F1, a, b, c, d, in[ 4] + 0xf57c0faf, 7); MD5STEP(F1, d, a, b, c, in[ 5] + 0x4787c62a, 12); MD5STEP(F1, c, d, a, b, in[ 6] + 0xa8304613, 17); MD5STEP(F1, b, c, d, a, in[ 7] + 0xfd469501, 22); MD5STEP(F1, a, b, c, d, in[ 8] + 0x698098d8, 7); MD5STEP(F1, d, a, b, c, in[ 9] + 0x8b44f7af, 12); MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); MD5STEP(F2, a, b, c, d, in[ 1] + 0xf61e2562, 5); MD5STEP(F2, d, a, b, c, in[ 6] + 0xc040b340, 9); MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); MD5STEP(F2, b, c, d, a, in[ 0] + 0xe9b6c7aa, 20); MD5STEP(F2, a, b, c, d, in[ 5] + 0xd62f105d, 5); MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); MD5STEP(F2, b, c, d, a, in[ 4] + 0xe7d3fbc8, 20); MD5STEP(F2, a, b, c, d, in[ 9] + 0x21e1cde6, 5); MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); MD5STEP(F2, c, d, a, b, in[ 3] + 0xf4d50d87, 14); MD5STEP(F2, b, c, d, a, in[ 8] + 0x455a14ed, 20); MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); MD5STEP(F2, d, a, b, c, in[ 2] + 0xfcefa3f8, 9); MD5STEP(F2, c, d, a, b, in[ 7] + 0x676f02d9, 14); MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); MD5STEP(F3, a, b, c, d, in[ 5] + 0xfffa3942, 4); MD5STEP(F3, d, a, b, c, in[ 8] + 0x8771f681, 11); MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); MD5STEP(F3, a, b, c, d, in[ 1] + 0xa4beea44, 4); MD5STEP(F3, d, a, b, c, in[ 4] + 0x4bdecfa9, 11); MD5STEP(F3, c, d, a, b, in[ 7] + 0xf6bb4b60, 16); MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); MD5STEP(F3, d, a, b, c, in[ 0] + 0xeaa127fa, 11); MD5STEP(F3, c, d, a, b, in[ 3] + 0xd4ef3085, 16); MD5STEP(F3, b, c, d, a, in[ 6] + 0x04881d05, 23); MD5STEP(F3, a, b, c, d, in[ 9] + 0xd9d4d039, 4); MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); MD5STEP(F3, b, c, d, a, in[2 ] + 0xc4ac5665, 23); MD5STEP(F4, a, b, c, d, in[ 0] + 0xf4292244, 6); MD5STEP(F4, d, a, b, c, in[7 ] + 0x432aff97, 10); MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); MD5STEP(F4, b, c, d, a, in[5 ] + 0xfc93a039, 21); MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); MD5STEP(F4, d, a, b, c, in[3 ] + 0x8f0ccc92, 10); MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); MD5STEP(F4, b, c, d, a, in[1 ] + 0x85845dd1, 21); MD5STEP(F4, a, b, c, d, in[8 ] + 0x6fa87e4f, 6); MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); MD5STEP(F4, c, d, a, b, in[6 ] + 0xa3014314, 15); MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); MD5STEP(F4, a, b, c, d, in[4 ] + 0xf7537e82, 6); MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); MD5STEP(F4, c, d, a, b, in[2 ] + 0x2ad7d2bb, 15); MD5STEP(F4, b, c, d, a, in[9 ] + 0xeb86d391, 21); state[0] += a; state[1] += b; state[2] += c; state[3] += d; } affxparser/src/fusion/util/md5.h0000644000175200017520000000276414516003651017654 0ustar00biocbuildbiocbuild/* This code was imported from OpenBSD. It is on the * COLIN_PLUMB_MD5 branch should it need updating. -jhg * * Before using these functions directly look at * "md5sum.cpp" which defines an "md5sum" class which should * make things eaiser to use from c++. */ /* $OpenBSD: md5.h,v 1.1.2.1 2004/06/05 23:12:36 niklas Exp $ */ /* * This code implements the MD5 message-digest algorithm. * The algorithm is due to Ron Rivest. This code was * written by Colin Plumb in 1993, no copyright is claimed. * This code is in the public domain; do with it what you wish. * * Equivalent code is available from RSA Data Security, Inc. * This code has been tested against that, and is equivalent, * except that you don't need to include two pages of legalese * with every copy. */ #ifndef _MD5_H_ #define _MD5_H_ // #include // #include // // some typedefs to smooth things over. typedef uint8_t u_int8_t; typedef uint32_t u_int32_t; typedef uint64_t u_int64_t; #define MD5_BLOCK_LENGTH 64 #define MD5_DIGEST_LENGTH 16 /// @brief The MD5 state context typedef struct MD5Context { u_int32_t state[4]; ///< state u_int64_t count; ///< number of bits, mod 2^64 u_int8_t buffer[MD5_BLOCK_LENGTH]; ///< input buffer } MD5_CTX; void MD5Init(MD5_CTX *); void MD5Update(MD5_CTX *, const u_int8_t *, u_int32_t); void MD5Final(u_int8_t [MD5_DIGEST_LENGTH], MD5_CTX *); void MD5Transform(u_int32_t [4], const u_int8_t [MD5_BLOCK_LENGTH]); #endif /* _MD5_H_ */ affxparser/src/fusion/util/md5sum.cpp0000644000175200017520000001406414516003651020730 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// // #include // #include #include // #include #include #include #include #include #ifdef _WIN32 #include #endif #ifndef _MSC_VER #include #include #endif using namespace std; /// @brief Creation of md5sum an object /// @return an md5sum affx::md5sum::md5sum() { init(); } /// @brief Init the starting state of the MD5 operation /// @return non-zero on error int affx::md5sum::init() { MD5Init(&m_context); return 0; } /// @brief update the MD5 state with a block of data /// @param buf Pointer to buffer of data /// @param buf_size Number of bytes in the buffer /// @return non-zero on error int affx::md5sum::update(void* buf,uint32_t buf_size) { MD5Update(&m_context,(const u_int8_t*)buf,buf_size); return 0; } /// @brief update the MD5 state with a string /// @param str String to add to the MD5 /// @return non-zero on error int affx::md5sum::update(const std::string& str) { MD5Update(&m_context,(const u_int8_t*)str.c_str(),(u_int32_t)str.size()); return 0; } int affx::md5sum::update(char const * const str) { MD5Update(&m_context,(const u_int8_t*)str,(u_int32_t)strlen(str)); return 0; } /// @brief update the MD5 state with a int in network byte order. /// @param val Value to update the md5 with. /// @return non-zero on error int affx::md5sum::update_nbo(int32_t val) { uint32_t val_bigendian=htonl((uint32_t)val); MD5Update(&m_context,(uint8_t*)&val_bigendian,sizeof(uint32_t)); return 0; } /// @brief update the MD5 state with a int in network byte order. /// @param val Value to update the md5 with. /// @return non-zero on error int affx::md5sum::update_nbo(uint32_t val) { uint32_t val_bigendian=htonl(val); MD5Update(&m_context,(uint8_t*)&val_bigendian,sizeof(uint32_t)); return 0; } /// @brief update the MD5 state with a int in network byte order. /// @param val Value to update the md5 with. /// @return non-zero on error int affx::md5sum::update_nbo(uint16_t val) { uint16_t val_bigendian=htons(val); MD5Update(&m_context,(uint8_t*)&val_bigendian,sizeof(uint16_t)); return 0; } /// @brief update the MD5 state with a int in network byte order. /// @param val Value to update the md5 with. /// @return non-zero on error int affx::md5sum::update_nbo(uint8_t val) { // single byte, no swap needed. MD5Update(&m_context,(uint8_t*)&val,sizeof(uint8_t)); return 0; } /// @brief Update the md5 with all the values of the vector /// @param vec vector of ints to add to the checksum. /// @return int affx::md5sum::update_nbo(const std::vector& vec) { for (int i=0;i>4); MD5SUM_NIB_APPEND(sum,bits&0x0f); } // prepare for the next round init(); return 0; } ////////// /// @brief Compute the MD5 of a std::string /// @param str string to checksum /// @param sum output checksum /// @return non-zero on error int affx::md5sum::ofString(std::string str,std::string& sum) { init(); update(str); return final(sum); } /// @brief Compute the MD5 of a file /// @param filename name of the file to /// @param sum /// @return non-zero on error int affx::md5sum::ofFile(const std::string filename,std::string& sum) { ifstream istm; char* buf_ptr; int buf_size=(1<<12); // 4MB // clear the output now in case of an error later. sum=""; init(); buf_ptr=new char[buf_size]; assert(buf_ptr!=NULL); std::string uncfilename=Fs::convertToUncPath(filename); //istm.exceptions(ios_base::bad_bit|ios_base::fail_bit); istm.open(uncfilename.c_str(),ios_base::binary); if (istm.fail()) { delete[] buf_ptr; return -1; } do { istm.read(buf_ptr,buf_size); update(buf_ptr,(int)istm.gcount()); } while (!istm.eof()); delete[] buf_ptr; istm.close(); return final(sum); } affxparser/src/fusion/util/md5sum.h0000644000175200017520000000360014516003651020367 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /// @file md5sum.h /// @brief C++ utility functions to make computing MD5s simple // #ifndef _MSC_VER #include #endif // #include // #include #include #include namespace affx { class md5sum; } /// @brief an object oriented interface to generating MD5s. /// provides methods to do strings and files with one call. class affx::md5sum { public: MD5Context m_context; ///< The basic MD5 context as wrapped by md5sum // md5sum(); // int init(); int update(void* buff,uint32_t buf_size); int update(const std::string& str); int update(char const * const str); // int update_nbo(int32_t val); int update_nbo(uint32_t val); int update_nbo(uint16_t val); int update_nbo(uint8_t val); // int update_nbo(const std::vector& vec); // not needed for now. // int update_nbo(val); // int update(double val); // int final(std::string& str); // handy methods int ofString(std::string str,std::string& sum); int ofFile(std::string filename,std::string& sum); }; affxparser/src/fusion/util/test-md5sum.cpp0000644000175200017520000000521014516003651021676 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// #include // #include #include #include #include #include // using namespace std; using namespace affx; const char* self_test_data[]={ // input, output "","d41d8cd98f00b204e9800998ecf8427e", "a","0cc175b9c0f1b6a831c399e269772661", "The quick brown fox jumps over the lazy dog","9e107d9d372bb6826bd81d3542a419d6", "The quick brown fox jumps over the lazy cog","1055d3e698d289f2af8663725127bd4b", NULL, NULL }; void self_test() { md5sum md5sum; std::string sum; const char** dataptr; dataptr=self_test_data; while (*dataptr!=NULL) { // generate the hashes. (we could have used "ofString()") md5sum.init(); md5sum.update(*dataptr); md5sum.final(sum); // printf("%-15s: \"%s\"\n%15s: %32s\n%15s: %32s\n\n", "String", *dataptr, "Reference", *(dataptr+1), "Computed", sum.c_str()); // computed MD5 should equal reference assert(sum==*(dataptr+1)); // step input and reference. dataptr+=2; } } ////////// void nbo_test_int(unsigned int val,const std::string& ref) { md5sum md5sum; std::string sum; md5sum.init(); md5sum.update_nbo(val); md5sum.final(sum); printf("0x%08x -> %32s\n",val,sum.c_str()); assert(sum==ref); } void nbo_test() { md5sum md5sum; std::string sum; // nbo_test_int( 0,"f1d3ff8443297732862df21dc4e57262"); nbo_test_int( 1,"f1450306517624a57eafbbf8ed995985"); nbo_test_int(123456,"d747074027ce566f5dd8697337091787"); } // int main(int argc,char* argv[]) { if (argc==1) { self_test(); nbo_test(); } else { for (int a=1;a #include #include #include #include #include // ////////// int opt_debug=0; int opt_verbose=0; ////////// /// @brief Dump the filename to the file as test data. /// @param filename filename to create void test_write_data(const std::string& filename) { assert(!Fs::isReadable(filename)); std::ofstream outs; outs.open(filename.c_str()); outs<setPrefix("tff-test1-"); tff->setDebug(opt_debug); tff->setVerbose(opt_verbose); // for (int i=0;i<10;i++) { // this allocates a tmp file name and remembers it. tfn=tff->genFilename(); if (opt_verbose>0) { printf("%4d : '%s'\n",i,tfn.c_str()); } // put something in the file. test_write_data(tfn); } // When the object is deleted, it cleans up all the tmpfiles. delete tff; } /// this function has a TmpFileFactory pointer passed into it. /// the allocations are done within its context. void test_2_helper(TmpFileFactory* tff,int cnt) { for (int i=0;igenFilename().c_str()); } } void test_2() { TmpFileFactory* tff=new TmpFileFactory(); tff->setPrefix("tff-test2-"); tff->setDebug(opt_debug); tff->setVerbose(opt_verbose); // passing the context into a fuctions test_2_helper(tff,5); test_2_helper(tff,5); // now clean up. delete tff; } void test_3() { TmpFileFactory* tff=new TmpFileFactory(); tff->setPrefix("tff-test3-"); tff->setDebug(opt_debug); tff->setVerbose(opt_verbose); // for (int i=0;i<10;i++) { tff->genDirname(); } // make a subdir and allocate future tmp files in that subdir. tff->setTmpdir(tff->genDirname()); // test_2_helper(tff,5); test_2_helper(tff,5); // delete tff; } void test_4() { // generate 10 dirs for fun. for (int i=0;i<10;i++) { GlobalTmpFileFactory()->genDirname(); } // set the dir to /var/tmp on unix. (Where on windows?) GlobalTmpFileFactory()->setTmpdir(GlobalTmpFileFactory()->getSystemTmpDir()); // gen a subdir in that directory and set our tmp dir to dir we made. GlobalTmpFileFactory()->setTmpdir(GlobalTmpFileFactory()->genDirname()); // generate 10 filenames in the new tmp dir for fun. for (int i=0;i<10;i++) { test_write_data(GlobalTmpFileFactory()->genFilename()); } } ////////// int main(int argc,char* argv[]) { PgOptions* opts=new PgOptions(); // opts->setUsage("Example and test program for generating test filenames." "\n" ); // opts->defineOption("h","help", PgOpt::BOOL_OPT, "Show help for the program.", "false"); opts->defineOption("v","verbose",PgOpt::INT_OPT, "verbose level", "0"); opts->defineOption("d","debug",PgOpt::INT_OPT, "debugging level.\n" "0 => None.\n" "1 => Print the tmp filename as they are generated.\n" "2 => Dont remove tmp files, leave them there for analysis.\n", "0"); // opts->parseArgv(argv); // opt_debug =opts->getInt("debug"); opt_verbose=opts->getInt("verbose"); // set some options. (this is optional.) GlobalTmpFileFactory()->setPrefix("tff-global-"); GlobalTmpFileFactory()->setDebug(opt_debug); GlobalTmpFileFactory()->setVerbose(opt_verbose); // test_1(); test_2(); test_3(); test_4(); // need to clean up when exiting. GlobalTmpFileFactoryFree(); // printf("ok.\n"); } affxparser/src/fusion/util/util-meminfo.cpp0000644000175200017520000000473114516003651022123 0ustar00biocbuildbiocbuild//////////////////////////////////////////////////////////////// // // Copyright (C) 2005 Affymetrix, Inc. // // This library is free software; you can redistribute it and/or modify // it under the terms of the GNU Lesser General Public License // (version 2.1) as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License // for more details. // // You should have received a copy of the GNU Lesser General Public License // along with this library; if not, write to the Free Software Foundation, Inc., // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //////////////////////////////////////////////////////////////// /// @file util-meminfo.cpp /// @brief prints the results of our meminfo functions for testing. #include "util/Util.h" #define B_FMT "%12llu " #define B_CVT(x) ((long long unsigned int)x) #define M_FMT "%10.3fMB " #define M_CVT(x) (x/(1024.0*1024)) int main(int argc,char* argv[]) { uint64_t free; uint64_t total; uint64_t swapAvail; uint64_t memAvail; #ifdef __linux__ if (argc==2) { printf("Reading linux /proc/meminfo from '%s' for testing:\n",argv[1]); memInfo_linux(argv[1],free,total,swapAvail,memAvail); } else { Util::memInfo(free,total,swapAvail,memAvail,false); } #else Util::memInfo(free,total,swapAvail,memAvail,false); #endif printf("meminfo: free=" B_FMT "total=" B_FMT "swapAvail=" B_FMT "memAvail=" B_FMT "\n", B_CVT(free),B_CVT(total),B_CVT(swapAvail),B_CVT(memAvail)); printf("meminfo: free=" M_FMT "total=" M_FMT "swapAvail=" M_FMT "memAvail=" M_FMT "\n", M_CVT(free),M_CVT(total),M_CVT(swapAvail),M_CVT(memAvail)); // and again with a cap #ifdef __linux__ if (argc==2) { printf("Reading linux /proc/meminfo from '%s' for testing:\n",argv[1]); memInfo_linux(argv[1],free,total,swapAvail,memAvail); } else { Util::memInfo(free,total,swapAvail,memAvail,true); } #else Util::memInfo(free,total,swapAvail,memAvail,true); #endif printf("meminfo (cap): free=" B_FMT "total=" B_FMT "swapAvail=" B_FMT "memAvail=" B_FMT "\n", B_CVT(free),B_CVT(total),B_CVT(swapAvail),B_CVT(memAvail)); printf("meminfo (cap): free=" M_FMT "total=" M_FMT "swapAvail=" M_FMT "memAvail=" M_FMT "\n", M_CVT(free),M_CVT(total),M_CVT(swapAvail),M_CVT(memAvail)); return 0; } affxparser/tests/0000755000175200017520000000000014516003651015100 5ustar00biocbuildbiocbuildaffxparser/tests/convertCel.R0000644000175200017520000000541414516003651017333 0ustar00biocbuildbiocbuildif (require("AffymetrixDataTestFiles")) { library("affxparser") ## rawData/ pathR <- system.file(package="AffymetrixDataTestFiles", mustWork=TRUE) pathR <- file.path(pathR, "rawData") ## File #1: Test3 chipType <- "Test3" path <- file.path(pathR, "FusionSDK_Test3", chipType, "2.Calvin") filename <- list.files(path=path, pattern="[.]CEL$")[1] pathname <- file.path(path, filename) hdr <- readCelHeader(pathname) str(hdr) stopifnot(hdr$chiptype == chipType) filename4 <- gsub(".CEL", ",v4.CEL", filename) pathname4 <- convertCel(pathname, filename4, verbose=TRUE, .validate=TRUE) print(pathname4) hdr4 <- readCelHeader(pathname4) str(hdr4) stopifnot(hdr4$chiptype == hdr$chiptype) ## New chip type newChipType <- sprintf("%s-custom", chipType) filename4 <- gsub(".CEL", ",v4,custom.CEL", filename) pathname4 <- convertCel(pathname, filename4, newChipType=newChipType, verbose=TRUE) print(pathname4) hdr4 <- readCelHeader(pathname4) str(hdr4) ## FIXME ## stopifnot(hdr4$chiptype == newChipType) ## File #2: FusionSDK_HG-U133A chipType <- "HG-U133A" path <- file.path(pathR, "FusionSDK_HG-U133A", chipType, "2.Calvin") filename <- list.files(path=path, pattern="[.]CEL$")[1] pathname <- file.path(path, filename) hdr <- readCelHeader(pathname) str(hdr) stopifnot(hdr$chiptype == chipType) filename4 <- gsub(".CEL", ",v4.CEL", filename) pathname4 <- convertCel(pathname, filename4, verbose=TRUE, .validate=TRUE) print(pathname4) hdr4 <- readCelHeader(pathname4) str(hdr4) stopifnot(hdr4$chiptype == hdr$chiptype) ## New chip type newChipType <- sprintf("%s-custom", chipType) filename4 <- gsub(".CEL", ",v4,custom.CEL", filename) pathname4 <- convertCel(pathname, filename4, newChipType=newChipType, verbose=TRUE) print(pathname4) hdr4 <- readCelHeader(pathname4) str(hdr4) stopifnot(hdr4$chiptype == newChipType) ## File #3: FusionSDK_Focus chipType <- "HG-Focus" path <- file.path(pathR, "FusionSDK_HG-Focus", chipType, "2.Calvin") filename <- list.files(path=path, pattern="[.]CEL$")[1] pathname <- file.path(path, filename) hdr <- readCelHeader(pathname) str(hdr) stopifnot(hdr$chiptype == chipType) filename4 <- gsub(".CEL", ",v4.CEL", filename) pathname4 <- convertCel(pathname, filename4, verbose=TRUE, .validate=TRUE) print(pathname4) hdr4 <- readCelHeader(pathname4) str(hdr4) stopifnot(hdr4$chiptype == hdr$chiptype) ## New chip type newChipType <- sprintf("%s-custom", chipType) filename4 <- gsub(".CEL", ",v4,custom.CEL", filename) pathname4 <- convertCel(pathname, filename4, newChipType=newChipType, verbose=TRUE) print(pathname4) hdr4 <- readCelHeader(pathname4) str(hdr4) ## FIXME ## stopifnot(hdr4$chiptype == newChipType) } # if (require("AffymetrixDataTestFiles")) affxparser/tests/readCdfDataFrame.R0000644000175200017520000000274214516003651020325 0ustar00biocbuildbiocbuildif (require("AffymetrixDataTestFiles")) { library("affxparser") pathR <- system.file(package="AffymetrixDataTestFiles") pathA <- file.path(pathR, "annotationData", "chipTypes", "Test3") # Read CDF structure cdf <- file.path(pathA, "1.XDA", "Test3.CDF") hdr <- readCdfHeader(cdf) Jall <- hdr$nunits # Various sets of indices to be read idxsList <- list( ## readNothing=integer(0L), # FIX ME readAll=NULL, readOne=10L, readSome=11:20, readDouble=as.double(11:20), outOfRange=-1L, outOfRange=0L, outOfRange=1e9L ) # Read full file data <- readCdfDataFrame(cdf) str(data) stopifnot(length(unique(data$unitName)) == Jall) # Read different subsets of units for (ii in seq_along(idxsList)) { name <- names(idxsList)[ii] message(sprintf("Testing readCdfDataFrame() with '%s' indices...", name)) idxs <- idxsList[[ii]] str(list(idxs=idxs)) if (grepl("outOfRange", name)) { res <- tryCatch(readCdfDataFrame(cdf, units=idxs), error=function(ex) ex) str(res) stopifnot(inherits(res, "error")) } else { data <- readCdfDataFrame(cdf, units=idxs) str(data) units <- if (is.null(idxs)) seq_len(Jall) else as.integer(idxs) stopifnot(length(unique(data$unitName)) == length(units)) stopifnot(identical(sort(unique(data$unit)), units)) } message(sprintf("Testing readCdfDataFrame() with '%s' indices...done", name)) } # for (ii ...) } # if (require("AffymetrixDataTestFiles")) affxparser/tests/readCdfQc.R0000644000175200017520000000252014516003651017036 0ustar00biocbuildbiocbuildif (require("AffymetrixDataTestFiles")) { library("affxparser") pathR <- system.file(package="AffymetrixDataTestFiles") pathA <- file.path(pathR, "annotationData", "chipTypes", "Test3") # Read CDF structure cdf <- file.path(pathA, "1.XDA", "Test3.CDF") hdr <- readCdfHeader(cdf) Jall <- hdr$nqcunits # Various sets of indices to be read idxsList <- list( ## readNothing=integer(0L), # FIX ME readAll=NULL, readOne=5L, readSome=5:10, readDouble=as.double(5:10), outOfRange=-1L, outOfRange=0L, outOfRange=1e9L ) # Read full file data <- readCdfQc(cdf) str(head(data)) stopifnot(length(data) == Jall) # Read different subsets of units for (ii in seq_along(idxsList)) { name <- names(idxsList)[ii] message(sprintf("Testing readCdfQc() with '%s' indices...", name)) idxs <- idxsList[[ii]] str(list(idxs=idxs)) if (grepl("outOfRange", name)) { res <- tryCatch(readCdfQc(cdf, units=idxs), error=function(ex) ex) str(res) stopifnot(inherits(res, "error")) } else { data <- readCdfQc(cdf, units=idxs) str(head(data)) J <- if (is.null(idxs)) Jall else length(idxs) stopifnot(length(data) == J) } message(sprintf("Testing readCdfQc() with '%s' indices...done", name)) } # for (ii ...) } # if (require("AffymetrixDataTestFiles")) affxparser/tests/readCdfUnitsWriteMap.R0000644000175200017520000000247714516003651021261 0ustar00biocbuildbiocbuildif (require("AffymetrixDataTestFiles")) { library("affxparser") pathR <- system.file(package="AffymetrixDataTestFiles") pathA <- file.path(pathR, "annotationData", "chipTypes", "Test3") # Read CDF structure cdf <- file.path(pathA, "1.XDA", "Test3.CDF") hdr <- readCdfHeader(cdf) Jall <- hdr$nunits # Various sets of indices to be read idxsList <- list( ## readNothing=integer(0L), # FIX ME readAll=NULL, readOne=10L, readSome=11:20, readDouble=as.double(11:20), outOfRange=-1L, outOfRange=0L, outOfRange=1e9L ) # Read full file data <- readCdfUnitsWriteMap(cdf) str(data) Jall <- length(data) # Read different subsets of units for (ii in seq_along(idxsList)) { name <- names(idxsList)[ii] message(sprintf("Testing readCdfUnitsWriteMap() with '%s' indices...", name)) idxs <- idxsList[[ii]] str(list(idxs=idxs)) if (grepl("outOfRange", name)) { res <- tryCatch(readCdfUnitsWriteMap(cdf, units=idxs), error=function(ex) ex) str(res) stopifnot(inherits(res, "error")) } else { data <- readCdfUnitsWriteMap(cdf, units=idxs) str(data) stopifnot(length(data) == Jall) } message(sprintf("Testing readCdfUnitsWriteMap() with '%s' indices...done", name)) } # for (ii ...) } # if (require("AffymetrixDataTestFiles")) affxparser/tests/readCdfUnits_etal.R0000644000175200017520000000345114516003651020606 0ustar00biocbuildbiocbuildif (require("AffymetrixDataTestFiles")) { library("affxparser") pathR <- system.file(package="AffymetrixDataTestFiles") pathA <- file.path(pathR, "annotationData", "chipTypes", "Test3") # Read CDF structure cdf <- file.path(pathA, "1.XDA", "Test3.CDF") hdr <- readCdfHeader(cdf) Jall <- hdr$nunits # Various sets of indices to be read idxsList <- list( ## readNothing=integer(0L), # FIX ME readAll=NULL, readOne=10L, readSome=11:20, readDouble=as.double(11:20), outOfRange=-1L, outOfRange=0L, outOfRange=1e9L ) fcnNames <- c( "readCdf", "readCdfUnits", "readCdfUnitNames", "readCdfNbrOfCellsPerUnitGroup", "readCdfGroupNames", "readCdfCellIndices", "readCdfIsPm" ) # Read full file for (fcnName in fcnNames) { fcn <- get(fcnName, mode="function", envir=getNamespace("affxparser")) data <- fcn(cdf) str(head(data)) stopifnot(length(data) == Jall) } # for (fcn ...) for (fcnName in fcnNames) { fcn <- get(fcnName, mode="function", envir=getNamespace("affxparser")) # Read different subsets of units for (ii in seq_along(idxsList)) { name <- names(idxsList)[ii] message(sprintf("Testing %s() with '%s' indices...", fcnName, name)) idxs <- idxsList[[ii]] str(list(idxs=idxs)) if (grepl("outOfRange", name)) { res <- tryCatch(readCdfQc(cdf, units=idxs), error=function(ex) ex) str(res) stopifnot(inherits(res, "error")) } else { data <- fcn(cdf, units=idxs) str(head(data)) J <- if (is.null(idxs)) Jall else length(idxs) stopifnot(length(data) == J) } message(sprintf("Testing %s() with '%s' indices...done", fcnName, name)) } # for (ii ...) } # for (fcn ...) } # if (require("AffymetrixDataTestFiles")) affxparser/tests/readCel.R0000644000175200017520000000276214516003651016571 0ustar00biocbuildbiocbuildif (require("AffymetrixDataTestFiles")) { library("affxparser") pathR <- system.file(package="AffymetrixDataTestFiles") pathD <- file.path(pathR, "rawData", "FusionSDK_Test3", "Test3") # Find all CEL files cels <- list.files(path=pathD, pattern="[.]CEL$", recursive=TRUE, full.names=TRUE) # Various sets of indices to be read idxsList <- list( # readNothing=integer(0L), # FIX ME readAll=NULL, readOne=10L, readSome=11:20, readDouble=as.double(11:20), outOfRange=-1L, outOfRange=0L, outOfRange=1e9L ) for (kk in seq_along(cels)) { cel <- cels[kk] # Read full file data <- readCel(cel) str(data) Jall <- data$header$total stopifnot(length(data$intensities) == Jall) # Read different subsets of cells for (ii in seq_along(idxsList)) { name <- names(idxsList)[ii] message(sprintf("Testing readCel() with '%s' indices...", name)) idxs <- idxsList[[ii]] str(list(idxs=idxs)) if (grepl("outOfRange", name)) { res <- tryCatch(readCel(cel, indices=idxs), error=function(ex) ex) str(res) stopifnot(inherits(res, "error")) } else { data <- readCel(cel, indices=idxs) str(data) J <- if (is.null(idxs)) Jall else length(idxs) stopifnot(length(data$intensities) == J) } message(sprintf("Testing readCel() with '%s' indices...done", name)) } # for (ii ...) } # for (kk ...) } # if (require("AffymetrixDataTestFiles")) affxparser/tests/readCelIntensities.R0000644000175200017520000000273114516003651021004 0ustar00biocbuildbiocbuildif (require("AffymetrixDataTestFiles")) { library("affxparser") pathR <- system.file(package="AffymetrixDataTestFiles") pathD <- file.path(pathR, "rawData", "FusionSDK_Test3", "Test3") # Find all CEL files path <- file.path(pathD, "2.Calvin") cels <- list.files(path=path, pattern="[.]CEL$", full.names=TRUE) hdr <- readCelHeader(cels[1L]) I <- length(cels) Jall <- hdr$total # Read full file data <- readCelIntensities(cels) str(data) stopifnot(all(dim(data) == c(Jall,I))) # Various sets of indices to be read idxsList <- list( # readNothing=integer(0L), # FIX ME readAll=NULL, readOne=10L, readSome=11:20, readDouble=as.double(11:20), outOfRange=-1L, outOfRange=0L, outOfRange=1e9L ) # Read different subsets of cells for (ii in seq_along(idxsList)) { name <- names(idxsList)[ii] message(sprintf("Testing readCelIntensities() with '%s' indices...", name)) idxs <- idxsList[[ii]] str(list(idxs=idxs)) if (grepl("outOfRange", name)) { res <- tryCatch(readCelIntensities(cels, indices=idxs), error=function(ex) ex) str(res) stopifnot(inherits(res, "error")) } else { data <- readCelIntensities(cels, indices=idxs) str(data) J <- if (is.null(idxs)) Jall else length(idxs) stopifnot(all(dim(data) == c(J,I))) } message(sprintf("Testing readCelIntensities() with '%s' indices...done", name)) } # for (ii ...) } # if (require("AffymetrixDataTestFiles")) affxparser/tests/readCelRectangle.R0000644000175200017520000000204314516003651020406 0ustar00biocbuildbiocbuildif (require("AffymetrixDataTestFiles")) { library("affxparser") rotate270 <- function(x, ...) { x <- t(x) nc <- ncol(x) if (nc < 2) return(x) x[,nc:1,drop=FALSE] } # Search for some available CEL files pathR <- system.file(package="AffymetrixDataTestFiles") pathD <- file.path(pathR, "rawData", "FusionSDK_HG-Focus", "HG-Focus") cel <- file.path(pathD, "2.Calvin", "HG-Focus-1-121502.CEL") # Read CEL intensities in the upper left corner range <- c(0,250) data <- readCelRectangle(cel, xrange=range, yrange=range) # Displaying image z <- rotate270(data$intensities) sub <- sprintf("Chip type: %s", data$header$chiptype) image(z, col=gray.colors(256), axes=FALSE, main=basename(cel), sub=sub) text(x=0, y=1, labels="(0,0)", adj=c(0,-0.7), cex=0.8, xpd=TRUE) text(x=1, y=0, labels="(250,250)", adj=c(1,1.2), cex=0.8, xpd=TRUE) # Read 1x1 rectangle range <- c(0,0) data <- readCelRectangle(cel, xrange=range, yrange=range) print(data$intensities) stopifnot(all(dim(data$intensities) == c(1,1))) } affxparser/tests/readCelUnits.R0000644000175200017520000000311014516003651017600 0ustar00biocbuildbiocbuildif (require("AffymetrixDataTestFiles")) { library("affxparser") pathR <- system.file(package="AffymetrixDataTestFiles") pathA <- file.path(pathR, "annotationData", "chipTypes", "Test3") pathD <- file.path(pathR, "rawData", "FusionSDK_Test3", "Test3") # Read CDF structure cdf <- file.path(pathA, "1.XDA", "Test3.CDF") hdr <- readCdfHeader(cdf) Jall <- hdr$nunits # Find all CEL files cels <- list.files(path=pathD, pattern="[.]CEL$", recursive=TRUE, full.names=TRUE) # Various sets of indices to be read idxsList <- list( ## readNothing=integer(0L), # FIX ME readAll=NULL, readOne=10L, readSome=11:20, readDouble=as.double(11:20), outOfRange=-1L, outOfRange=0L, outOfRange=1e9L ) # Read full file data <- readCelUnits(cels, cdf=cdf) str(head(data)) stopifnot(length(data) == Jall) # Read different subsets of units for (ii in seq_along(idxsList)) { name <- names(idxsList)[ii] message(sprintf("Testing readCelUnits() with '%s' indices...", name)) idxs <- idxsList[[ii]] str(list(idxs=idxs)) if (grepl("outOfRange", name)) { res <- tryCatch(readCelUnits(cels, units=idxs, cdf=cdf), error=function(ex) ex) str(res) stopifnot(inherits(res, "error")) } else { data <- readCelUnits(cels, units=idxs, cdf=cdf) str(head(data)) J <- if (is.null(idxs)) Jall else length(idxs) stopifnot(length(data) == J) } message(sprintf("Testing readCelUnits() with '%s' indices...done", name)) } # for (ii ...) } # if (require("AffymetrixDataTestFiles")) affxparser/tests/readPgf.R0000644000175200017520000000577614516003651016612 0ustar00biocbuildbiocbuildif (require("AffymetrixDataTestFiles") && packageVersion("AffymetrixDataTestFiles") >= "0.4.0") { library("affxparser") pathR <- system.file(package="AffymetrixDataTestFiles") pathA <- file.path(pathR, "annotationData", "chipTypes", "HuGene-1_0-st-v1") # Read PGF structure pgf <- file.path(pathA, "HuGene-1_0-st-v1.r4,10_probesets.pgf") # NOTE: Hard-coded Jall <- 10L # Various sets of indices to be read idxsList <- list( ## readNothing=integer(0L), # FIX ME readAll=NULL, readOne=5L, readSome=1:5, readDouble=as.double(1:5), outOfRange=-1L, outOfRange=0L, outOfRange=1e9L ) data <- readPgf(pgf) str(head(data)) stopifnot(identical(data$header$chip_type, "HuGene-1_0-st-v1")) stopifnot(length(data$probesetName) == Jall) # Read different subsets of units for (ii in seq_along(idxsList)) { name <- names(idxsList)[ii] message(sprintf("Testing readPgf() with '%s' indices...", name)) idxs <- idxsList[[ii]] str(list(idxs=idxs)) if (grepl("outOfRange", name)) { res <- tryCatch(readPgf(pgf, indices=idxs), error=function(ex) ex) str(res) stopifnot(inherits(res, "error")) } else { data <- readPgf(pgf, indices=idxs) str(head(data)) stopifnot(identical(data$header$chip_type, "HuGene-1_0-st-v1")) J <- if (is.null(idxs)) Jall else length(idxs) stopifnot(length(data$probesetName) == J) } message(sprintf("Testing readPgf() with '%s' indices...done", name)) } # for (ii ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate correctness of subsets # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - subsetPgf <- function(data, indices=NULL, ...) { if (is.null(indices)) return(data) # Atoms offsets <- data$probesetStartAtom natoms <- diff(c(offsets, length(data0$atomStartProbe)+1L)) offsets <- offsets[indices] natoms <- natoms[indices] # Identify atoms to keep keep <- logical(length(data$atomStartProbe)) for (kk in seq_along(offsets)) { keep[seq(from=offsets[kk], by=1L, length=natoms[kk])] <- TRUE; } for (ff in c("probeSequence", "probeId", "probeGcCount", "atomExonPosition", "atomId", "probeInterrogationPosition", "probeLength", "probeType")) { data[[ff]] <- data[[ff]][keep] } data$atomStartProbe <- seq_len(sum(natoms)) data$probesetStartAtom <- c(1L, cumsum(natoms))[length(indices)] # Probesets for (ff in c("probesetName", "probesetId", "probesetType")) { data[[ff]] <- data[[ff]][indices] } data } # subsetPgf() data0 <- readPgf(pgf) Jall <- length(data0$probesetId) for (kk in 1:10) { n <- sample(Jall, size=1L) idxs <- sort(sample(1:Jall, size=n, replace=FALSE)) data <- readPgf(pgf, indices=idxs) dataS <- subsetPgf(data0, indices=idxs) for (ff in c("probesetStartAtom", "atomExonPosition")) data[[ff]] <- dataS[[ff]] <- NULL stopifnot(all.equal(data, dataS)) } } # if (require("AffymetrixDataTestFiles")) affxparser/tests/testWriteAndReadEmptyCdf.R0000644000175200017520000000240714516003651022073 0ustar00biocbuildbiocbuildsystemR <- function(command="", ..., verbose=FALSE) { # Locate the R executable Rbin <- file.path(R.home("bin"), "R") cmd <- sprintf('%s %s', shQuote(Rbin), command) if (verbose) cat("Command: ", cmd, "\n", sep="") system(cmd, ...) } # systemR() ## Explicitly append 'affxparser' to library path ## Needed for covr::coverage() pd <- packageDescription("affxparser") libpath <- dirname(dirname(dirname(attr(pd, "file")))) cmd <- sprintf(' -e ".libPaths(\'%s\'); affxparser:::.testWriteAndReadEmptyCdf()"', libpath) out <- systemR(cmd, intern=TRUE, wait=TRUE, verbose=TRUE) cat(out, sep="\n") res <- any(regexpr("COMPLETE", out) != -1) cat("Test result: ", res, "\n", sep="") if (!res) { stop("affxparser:::.testWriteAndReadEmptyCdf() failed.") } ############################################################################ # HISTORY: # 2012-05-22 # o ROBUSTNESS: Now launching R without assuming it is on the search path, # cf. R-devel thread 'Best way to locate R executable from within R?' # on May 22, 2012. # 2012-05-18 # o Added because of the OSX build bug, cf. # https://groups.google.com/d/topic/aroma-affymetrix/lEfDanThLEA/discussion # o Created. ############################################################################ affxparser/tests/testWriteAndReadEmptyCel.R0000644000175200017520000000177014516003651022104 0ustar00biocbuildbiocbuildsystemR <- function(command="", ..., verbose=FALSE) { # Locate the R executable Rbin <- file.path(R.home("bin"), "R") cmd <- sprintf('%s %s', shQuote(Rbin), command) if (verbose) cat("Command: ", cmd, "\n", sep="") system(cmd, ...) } # systemR() ## Explicitly append 'affxparser' to library path ## Needed for covr::coverage() pd <- packageDescription("affxparser") libpath <- dirname(dirname(dirname(attr(pd, "file")))) cmd <- sprintf(' -e ".libPaths(\'%s\'); affxparser:::.testWriteAndReadEmptyCel()"', libpath) out <- systemR(cmd, intern=TRUE, wait=TRUE, verbose=TRUE) cat(out, sep="\n") res <- any(regexpr("COMPLETE", out) != -1) cat("Test result: ", res, "\n", sep="") if (!res) { stop("affxparser:::.testWriteAndReadEmptyCel() failed.") } ############################################################################ # HISTORY: # 2012-09-26 # o Created from tests/testWriteAndReadEmptyCdf.R. ############################################################################ affxparser/tests/torture/0000755000175200017520000000000014516003651016604 5ustar00biocbuildbiocbuildaffxparser/tests/torture/tortureReadCdfUnits.R0000644000175200017520000000440414516003651022671 0ustar00biocbuildbiocbuild############################################################################# # This script will test: # readCdfHeader() # readCdfUnits() ############################################################################# library("affxparser"); library("AffymetrixDataTestFiles"); set.seed(1); logMemory <- exists("memory.size", mode="function"); memSizeRange <- NA; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Search for a CDF file # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cdfFile <- findCdf("Mapping10K_Xba131"); if (is.null(cdfFile)) stop("No CDF file found"); cat("Found CDF file: ", cdfFile, "\n", sep="") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Start torturing # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - settings <- getOption("affxparser.settings"); tortureCount <- settings$tests$torture; if (is.null(tortureCount)) tortureCount <- 25 for (kk in 1:tortureCount) { cat(sprintf("Round %d of %d:\n", kk, tortureCount)); # Sample units to read maxNbrOfUnits <- readCdfHeader(cdfFile)$probesets; nbrOfUnits <- as.integer(runif(n=1, min=0, max=maxNbrOfUnits-0.5)); nbrOfUnits <- as.integer(nbrOfUnits / 4 + 1); units <- sample(maxNbrOfUnits, size=nbrOfUnits); cat(sprintf("Reading %d random units (in [%d,%d]) in random order.\n", nbrOfUnits, min(units), max(units))); # Sample 'stratifyBy' stratifyBy <- c("nothing", "pmmm", "pm", "mm")[sample(4,size=1)]; cat(sprintf("Stratifying by: %s.\n", stratifyBy)); # Reading CDF units t <- system.time({ res <- readCdfUnits(cdfFile, units=units, stratifyBy=stratifyBy, verbose=1); }, gcFirst=FALSE)[3] cat(sprintf("Number of units read: %d\n", length(res))); cat(sprintf("Read time: %.2fs = %.3fs/1000 unit\n", t, 1000*t/length(res))); resSize <- object.size(res); cat(sprintf("Size: %.3gMB = %.2f bytes/unit\n", resSize/1024^2, resSize/length(res))); if (logMemory) { memSize <- memory.size(); memSizeRange <- range(memSizeRange, memSize, na.rm=TRUE); cat(sprintf("Memory size: %.2fMB in [%.2fMB, %.2fMB]\n", memSize/1024^2, memSizeRange[1]/1024^2, memSizeRange[2]/1024^2)); } cat("\n"); rm(res); } affxparser/tests/torture/tortureReadCel.R0000644000175200017520000000553614516003651021664 0ustar00biocbuildbiocbuild############################################################################# # This script will test: # readCelHeader() # readCel() ############################################################################# library("affxparser"); library("AffymetrixDataTestFiles"); library("R.utils"); # filePath() logMemory <- exists("memory.size", mode="function"); memSizeRange <- NA; # Search for some available Calvin CEL files path <- system.file("rawData", package="AffymetrixDataTestFiles") files <- findFiles(pattern="[.](cel|CEL)$", path=path, recursive=TRUE, firstOnly=FALSE) files <- grep("FusionSDK_Test3", files, value=TRUE) files <- grep("Calvin", files, value=TRUE) celFiles <- files nbrOfFiles <- length(celFiles); if (nbrOfFiles == 0) stop("No CEL files found"); cat(sprintf("Found %d CEL file(s)\n", nbrOfFiles)); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Start torturing # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - settings <- getOption("affxparser.settings"); tortureCount <- settings$tests$torture; if (is.null(tortureCount)) tortureCount <- 25 for (kk in 1:tortureCount) { cat(sprintf("Round %d of %d:\n", kk, tortureCount)); # Sample a CEL file celFile <- celFiles[sample(nbrOfFiles, size=1)]; header <- readCelHeader(celFile); maxNbrOfCells <- header$rows * header$cols; # Sample cells to read nbrOfCells <- as.integer(runif(n=1, min=0, max=maxNbrOfCells-0.5)); cells <- sample(1:maxNbrOfCells, size=nbrOfCells); cat(sprintf("Reading %d random cells (in [%d,%d]) in random order from %s.\n", nbrOfCells, min(cells), max(cells), celFile)); # Sample 'readXY', 'readPixels' and 'readStdvs'. readXY <- sample(c(TRUE,FALSE), size=1); readIntensities <- sample(c(TRUE,FALSE), size=1); readStdvs <- sample(c(TRUE,FALSE), size=1); readPixels <- sample(c(TRUE,FALSE), size=1); cat(sprintf("Reading (x,y): %s\n", as.character(readXY))); cat(sprintf("Reading intensities: %s\n", as.character(readIntensities))); cat(sprintf("Reading stdvs: %s\n", as.character(readStdvs))); cat(sprintf("Reading pixels: %s\n", as.character(readPixels))); # Reading CEL elements t <- system.time({ cel <- readCel(celFile, indices=cells, readXY=readXY, readIntensities=readIntensities, readStdvs=readStdvs, readPixels=readPixels); }, gcFirst=FALSE)[3] cat(sprintf("Number of cells read: %d\n", length(cel))); cat(sprintf("Read time: %.2fs = %.3fs/1000 unit\n", t, 1000*t/length(cel))); celSize <- object.size(cel); cat(sprintf("Size: %.3gMB = %.2f bytes/probe\n", celSize/1024^2, celSize/length(cel))); if (logMemory) { memSize <- memory.size(); memSizeRange <- range(memSizeRange, memSize, na.rm=TRUE); cat(sprintf("Memory size: %.2fMB in [%.2fMB, %.2fMB]\n", memSize/1024^2, memSizeRange[1]/1024^2, memSizeRange[2]/1024^2)); } cat("\n"); rm(cel); } affxparser/tests/torture/tortureReadCelHeader.R0000644000175200017520000000351214516003651022765 0ustar00biocbuildbiocbuild############################################################################# # This script will test: # readCelHeader() ############################################################################# library("affxparser"); library("AffymetrixDataTestFiles"); library("R.utils"); # filePath() set.seed(1); logMemory <- exists("memory.size", mode="function"); memSizeRange <- NA; # Search for some available Calvin CEL files path <- system.file("rawData", package="AffymetrixDataTestFiles") files <- findFiles(pattern="[.](cel|CEL)$", path=path, recursive=TRUE, firstOnly=FALSE) files <- grep("FusionSDK_Test3", files, value=TRUE) files <- grep("Calvin", files, value=TRUE) celFiles <- files nbrOfFiles <- length(celFiles); if (nbrOfFiles == 0) stop("No CEL files found"); cat(sprintf("Found %d CEL file(s)\n", nbrOfFiles)); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Start torturing # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - settings <- getOption("affxparser.settings"); tortureCount <- settings$tests$torture; if (is.null(tortureCount)) tortureCount <- 25 for (kk in 1:tortureCount) { cat(sprintf("Round %d of %d:\n", kk, tortureCount)); # Sample a CEL file celFile <- celFiles[sample(nbrOfFiles, size=1)]; # Reading CEL header t <- system.time({ header <- readCelHeader(celFile); }, gcFirst=FALSE)[3] cat(sprintf("Number header elements read: %d\n", length(header))); cat(sprintf("Read time: %.2fms\n", 1000*t)); size <- object.size(header); cat(sprintf("Size: %.3gkB\n", size/1024)); if (logMemory) { memSize <- memory.size(); memSizeRange <- range(memSizeRange, memSize, na.rm=TRUE); cat(sprintf("Memory size: %.2fMB in [%.2fMB, %.2fMB]\n", memSize/1024^2, memSizeRange[1]/1024^2, memSizeRange[2]/1024^2)); } cat("\n"); } affxparser/tests/torture/tortureReadCelUnits.R0000644000175200017520000000660114516003651022701 0ustar00biocbuildbiocbuild############################################################################# # This script will test: # readCelHeader() # readCdfHeader() # readCelUnits() # and indirectly: # readCdfCellIndices() and readCel() ############################################################################# library("affxparser"); library("AffymetrixDataTestFiles"); library("R.utils"); # filePath() set.seed(1); logMemory <- exists("memory.size", mode="function"); memSizeRange <- NA; # Search for some available Calvin CEL files path <- system.file("rawData", package="AffymetrixDataTestFiles") files <- findFiles(pattern="[.](cel|CEL)$", path=path, recursive=TRUE, firstOnly=FALSE) files <- grep("FusionSDK_Test3", files, value=TRUE) files <- grep("Calvin", files, value=TRUE) celFiles <- files nbrOfFiles <- length(celFiles); if (nbrOfFiles == 0) stop("No CEL files found"); cat(sprintf("Found %d CEL file(s)\n", nbrOfFiles)); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Start torturing # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - settings <- getOption("affxparser.settings"); tortureCount <- settings$tests$torture; if (is.null(tortureCount)) tortureCount <- 25 for (kk in 1:tortureCount) { cat(sprintf("Round %d of %d:\n", kk, tortureCount)); # Sample a CEL file celFile <- celFiles[sample(nbrOfFiles, size=1)]; chipType <- readCelHeader(celFile)$chiptype; cdfFile <- findCdf(chipType); maxNbrOfUnits <- readCdfHeader(cdfFile)$probesets; # Sample units to read nbrOfUnits <- as.integer(runif(n=1, min=0, max=maxNbrOfUnits-0.5)); nbrOfUnits <- as.integer(nbrOfUnits / 4 + 1); units <- sample(1:maxNbrOfUnits, size=nbrOfUnits); cat(sprintf("Reading %d random units (in [%d,%d]) in random order.\n", nbrOfUnits, min(units), max(units))); # Sample 'readXY', 'readPixels' 'readStdvs', and 'stratifyBy'. readXY <- sample(c(TRUE,FALSE), size=1); readIntensities <- sample(c(TRUE,FALSE), size=1); readStdvs <- sample(c(TRUE,FALSE), size=1); readPixels <- sample(c(TRUE,FALSE), size=1); stratifyBy <- c("nothing", "pmmm", "pm", "mm")[sample(4,size=1)]; readXY <- TRUE; readIntensities <- TRUE; readStdvs <- TRUE; readPixels <- TRUE; stratifyBy <- "pmmm"; stratifyBy <- "nothing"; cat(sprintf("Reading (x,y): %s\n", as.character(readXY))); cat(sprintf("Reading intensities: %s\n", as.character(readIntensities))); cat(sprintf("Reading stdvs: %s\n", as.character(readStdvs))); cat(sprintf("Reading pixels: %s\n", as.character(readPixels))); cat(sprintf("Stratifying by: %s\n", stratifyBy)); # Reading CEL elements # gc(); t <- system.time({ res <- readCelUnits(celFile, units=units, readXY=readXY, readIntensities=readIntensities, readStdvs=readStdvs, readPixels=readPixels, stratifyBy=stratifyBy, verbose=TRUE); }, gcFirst=FALSE)[3] str(res[1]); cat(sprintf("Number of units read: %d\n", length(res))); cat(sprintf("Read time: %.2fs = %.3fs/1000 unit\n", t, 1000*t/length(res))); resSize <- object.size(res); cat(sprintf("Size: %.3gMB = %.2f bytes/unit\n", resSize/1024^2, resSize/length(res))); if (logMemory) { memSize <- memory.size(); memSizeRange <- range(memSizeRange, memSize, na.rm=TRUE); cat(sprintf("Memory size: %.2fMB in [%.2fMB, %.2fMB]\n", memSize/1024^2, memSizeRange[1]/1024^2, memSizeRange[2]/1024^2)); } cat("\n"); rm(res); } affxparser/tests/validate/0000755000175200017520000000000014516003651016671 5ustar00biocbuildbiocbuildaffxparser/tests/validate/validateCelFileFormats.R0000644000175200017520000000421214516003651023364 0ustar00biocbuildbiocbuild########################################################################### # Test for different CEL file formats # # This script loads the Test3 CEL files in ASCII, binary, and in Calvin # formats and asserts that the read data is identical across file formats. ########################################################################### library("affxparser"); library("AffymetrixDataTestFiles"); # Search for some CEL files path <- system.file("rawData", package="AffymetrixDataTestFiles") files <- findFiles(pattern="[.](cel|CEL)$", path=path, recursive=TRUE, firstOnly=FALSE) files <- grep("FusionSDK_Test3", files, value=TRUE) nbrOfFiles <- length(files); if (nbrOfFiles == 0) stop("No CEL files found"); cat(sprintf("Found %d CEL file(s)\n", nbrOfFiles)); # Split them up in XDA, Calvin, and ASCII. celFiles <- list(); for (format in c("XDA", "Calvin", "ASCII")) { tmp <- grep(format, files, value=TRUE); if (length(tmp) > 0) celFiles[[format]] <- tmp; } files <- celFiles; print(files); # Read all CEL files cel <- list(); for (format in names(files)) { cel[[format]] <- readCelUnits(files[[format]]); } # Assert that CEL files for different file formats give identical results for (format1 in names(cel)) { for (format2 in setdiff(names(cel), format1)) { stopifnot(identical(cel[[format1]], cel[[format2]])); } } # Plot spatial distribution intensities rotate270 <- function(x, ...) { x <- t(x) nc <- ncol(x) if (nc < 2) return(x) x[,nc:1,drop=FALSE] } layout(matrix(1:6, ncol=2, byrow=TRUE)); opar <- par(mar=c(1,6,1.5,5)+0.1); for (format in names(files)) { for (file in files[[format]]) { cel <- readCelRectangle(file); z <- rotate270(cel$intensities); image(z, col=gray.colors(256), axes=FALSE) title(main=sprintf("%s [%s]", basename(file), format), line=+0.75); chiptype <- paste("Chip type:", cel$header$chiptype); text(x=0, y=0.5, labels=chiptype, srt=90, adj=c(0.5,-1), xpd=TRUE) text(x=0, y=1, labels="(0,0)", adj=c(0,-0.7), cex=0.8, xpd=TRUE); lrLabel <- sprintf("(%d,%d)", cel$header$cols, cel$header$rows); text(x=1, y=0, labels=lrLabel, adj=c(1,1.2), cex=0.8, xpd=TRUE); } } par(opar);